RvSpectML · eford · Dec 17, 2024 · Jul 14, 2025 · Aug 15, 2025 · Aug 21, 2025
diff --git a/Project.toml b/Project.toml
@@ -1,14 +1,15 @@
 name = "StellarSpectraObservationFitting"
 uuid = "6b1189bd-9150-44f8-8055-f6ca7d16451e"
-authors = ["Chistian Gilbertson <chistianjgilbertson@gmail.com>"]
-version = "0.1.3"
+authors = ["Christian Gilbertson <chistianjgilbertson@gmail.com>", "Kristo Ment <kristo.ment@gmail.com>"]
+version = "0.2.0"
 
 [deps]
 AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918"
 DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 ExpectationMaximizationPCA = "20521544-8895-46ad-ab44-5135df82bd8f"
+FITSIO = "525bcba6-941b-5504-bd06-fd0dc1a4d2eb"
 KernelFunctions = "ec8451be-7e33-11e9-00cf-bbf324bd1392"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

diff --git a/examples/_lsf.jl b/examples/_lsf.jl
@@ -61,7 +61,8 @@ module NEIDLSF
         return ans
     end
 
-    function neid_lsf(order::Int, log_λ_neid_order::AbstractVector, log_λ_obs::AbstractVector)
+    # KM: Adding the option to extrapolate
+    function neid_lsf(order::Int, log_λ_neid_order::AbstractVector, log_λ_obs::AbstractVector; extrapolate::Bool=false)
         @assert 1 <= order <= length(no_lsf_orders)
         if no_lsf_orders[order]; return nothing end
         n = length(log_λ_obs)
@@ -72,7 +73,7 @@ module NEIDLSF
         pixel_separation_ratio = SSOF.simple_derivative(log_λ_neid_order) ./ pixel_separation_log_λ_obs.(log_λ_neid_order)
         # make the linear_interpolation object and evaluate it
         # converter(vals) = linear_interpolation(log_λ_neid_order, pixel_separation_ratio .* vals; extrapolation_bc=Line())(log_λ_obs)
-        converter(vals) = (DataInterpolations.LinearInterpolation(pixel_separation_ratio .* vals, log_λ_neid_order)).(log_λ_obs)
+        converter(vals) = (DataInterpolations.LinearInterpolation(pixel_separation_ratio .* vals, log_λ_neid_order; extrapolate=extrapolate)).(log_λ_obs)
         σs_converted = converter(σs[:, order])
         bhws_converted = converter(bhws[:, order])
         threeish_sigma_converted = converter(threeish_sigma.(σs[:, order], bhws[:, order]))

diff --git a/src/StellarSpectraObservationFitting.jl b/src/StellarSpectraObservationFitting.jl
@@ -14,5 +14,6 @@ include("Nabla_extension.jl")
 include("prior_gp_functions.jl")
 include("rassine.jl")
 include("error_estimation.jl")
+include("telluric_functions.jl")
 
 end # module
diff --git a/src/error_estimation.jl b/src/error_estimation.jl
@@ -79,12 +79,12 @@ function estimate_σ_curvature_helper_finalizer!(σs::AbstractVecOrMat, _ℓs::A
 	# fit a parabola (or line if using gradient) to `_ℓs` and convert to uncertainties
 	if use_gradient
 		poly_f = ordinary_lst_sq_f(_ℓs, 1; x=x_test)
-		σs[i] = sqrt(1 / poly_f.w[2])
+		σs[i] = poly_f.w[2] > 0 ? sqrt(1 / poly_f.w[2]) : 0			# KM: added check for negative values
 		max_dif = maximum(abs.((poly_f.(x_test)./_ℓs) .- 1))
 		if verbose; println("∇_$i: $(poly_f.w[1] + poly_f.w[2] * x[i])") end
 	else
 		poly_f = ordinary_lst_sq_f(_ℓs, 2; x=x_test)
-		σs[i] = sqrt(1 / (2 * poly_f.w[3]))
+		σs[i] = poly_f.w[3] > 0 ? sqrt(1 / (2 * poly_f.w[3])) : 0	# KM: added check for negative values
 		max_dif = maximum(abs.((poly_f.(x_test)./_ℓs) .- 1))
 		if verbose; println("∇_$i: $(poly_f.w[2] + 2 * poly_f.w[3] * x[i])") end
 	end

diff --git a/src/model_functions.jl b/src/model_functions.jl
diff --git a/src/optimization_functions.jl b/src/optimization_functions.jl
diff --git a/src/regularization_functions.jl b/src/regularization_functions.jl
@@ -164,8 +164,10 @@ end
 
 
 _key_list = [:GP_μ, :L2_μ, :L1_μ, :L1_μ₊_factor, :GP_M, :L2_M, :L1_M, :shared_M]
+_key_list_μ = [:GP_μ, :L2_μ, :L1_μ, :L1_μ₊_factor]
 _key_list_fit = [:GP_μ, :L2_μ, :L1_μ, :GP_M, :L2_M, :L1_M]
 _key_list_bases = [:GP_M, :L2_M, :L1_M, :shared_M]
+_key_list_model = [:GP_μ, :L2_μ, :L1_μ, :L2_h2o, :L1_h2o, :L2_oxy, :L1_oxy]
 
 
 """
@@ -179,27 +181,38 @@ function check_for_valid_regularization(reg::Dict{Symbol, <:Real})
     end
 end
 
-min_reg = 1e-3
-max_reg = 1e12
+min_reg = 1e-1 #1e-3
+max_reg = 1e7 #1e12
+min_reg_model = 1e0
+max_reg_model = 1e12
 
 
 """
     fit_regularization!(mws, testing_inds; key_list=_key_list_fit, share_regs=false, kwargs...)
 
 Fit all of the regularization values in `key_list` for the model in `mws`
 """
-function fit_regularization!(mws::ModelWorkspace, testing_inds::AbstractVecOrMat; key_list::Vector{Symbol}=_key_list_fit, share_regs::Bool=false, kwargs...)
+function fit_regularization!(mws::ModelWorkspace, testing_inds::AbstractVecOrMat; key_list::Vector{Symbol}=_key_list_fit, share_regs::Bool=false, ignore_μ_tel::Bool=false, kwargs...)
     om = mws.om
     n_obs = size(mws.d.flux, 2)
     training_inds = [i for i in 1:n_obs if !(i in testing_inds)]
     check_for_valid_regularization(om.reg_tel)
     check_for_valid_regularization(om.reg_star)
     if share_regs; @assert keys(om.reg_tel) == keys(om.reg_star) end
+    if share_regs; @assert !ignore_μ_tel end
     hold_tel = copy(default_reg_star_full)
     hold_star = copy(default_reg_tel_full)
+    initial_tel = copy(om.reg_tel)
     copy_dict!(hold_tel, om.reg_tel)
     copy_dict!(hold_star, om.reg_star)
     zero_regularization(om)
+    if ignore_μ_tel
+        for key in _key_list_μ
+            if key in keys(initial_tel)
+                om.reg_tel[key] = initial_tel[key]
+            end
+        end
+    end
     println("starting regularization searches")
     before_ℓ = _eval_regularization(copy(mws.om), mws, training_inds, testing_inds)
     println("initial training χ²: $before_ℓ")
@@ -215,28 +228,57 @@ function fit_regularization!(mws::ModelWorkspace, testing_inds::AbstractVecOrMat
             if (!(key in _key_list_bases)) || is_time_variable(om.star)
                 before_ℓ = fit_regularization_helper!([:reg_star], key, before_ℓ, mws, training_inds, testing_inds, test_factor, reg_min, reg_max; start=hold_star[key], kwargs...)
             end
-            if (!(key in _key_list_bases)) || is_time_variable(om.tel)
+            if ((!(key in _key_list_bases)) || is_time_variable(om.tel)) && !((key in _key_list_μ) && ignore_μ_tel)
                 before_ℓ = fit_regularization_helper!([:reg_tel], key, before_ℓ, mws, training_inds, testing_inds, test_factor, reg_min, reg_max; start=hold_tel[key], kwargs...)
             end
         end
     end
 end
 
 
+"""
+    fit_regularization_model!(mws, testing_inds; key_list=_key_list_model, kwargs...)
+
+Fit all of the regularization values in `key_list` for `mws.om.reg_model`
+"""
+function fit_regularization_model!(mws::ModelWorkspace, testing_inds::AbstractVecOrMat; key_list::Vector{Symbol}=_key_list_model, kwargs...)
+    om = mws.om
+    n_obs = size(mws.d.flux, 2)
+    training_inds = [i for i in 1:n_obs if !(i in testing_inds)]
+    hold_reg = copy(om.reg_model)
+    for (key, value) in om.reg_model
+        om.reg_model[key] = 0
+    end
+    println("starting regularization searches for mws.om.reg_model")
+    before_ℓ = _eval_regularization(copy(mws.om), mws, training_inds, testing_inds)
+    println("initial training χ²: $before_ℓ")
+    test_factor, reg_min, reg_max = 10, min_reg_model, max_reg_model
+    for key in key_list
+        if haskey(hold_reg, key)
+            before_ℓ = fit_regularization_helper!([:reg_model], key, before_ℓ, mws, training_inds, testing_inds, test_factor, reg_min, reg_max; start=hold_reg[key], kwargs...)
+        end
+    end
+end
+
+
 
 """
     fit_regularization!(mws; verbose=true, testing_ratio=0.33, careful_first_step=true, speed_up=false, kwargs...)
 
 Find the best fit model without regularization then fit all of the regularization values in `key_list` for the model in `mws`
 """
-function fit_regularization!(mws::ModelWorkspace; verbose::Bool=true, testing_ratio::Real=0.33, careful_first_step::Bool=true, speed_up::Bool=false, kwargs...)
+function fit_regularization!(mws::ModelWorkspace; verbose::Bool=true, testing_ratio::Real=0.33, careful_first_step::Bool=true, speed_up::Bool=false,
+                             fit_reg_model::Bool=true, kwargs...)
 	# if mws.om.metadata[:todo][:reg_improved]
     n_obs = size(mws.d.flux, 2)
     train_OrderModel!(mws; verbose=verbose, ignore_regularization=true, careful_first_step=careful_first_step, speed_up=speed_up)
     n_obs_test = Int(round(testing_ratio * n_obs))
     test_start_ind = max(1, Int(round(rand() * (n_obs - n_obs_test))))
     testing_inds = test_start_ind:test_start_ind+n_obs_test-1
     fit_regularization!(mws, testing_inds; kwargs...)
+    if mws.om.metadata[:tel_prior] && fit_reg_model
+        fit_regularization_model!(mws, testing_inds; kwargs...)
+    end
     mws.om.metadata[:todo][:reg_improved] = true
 	# end
 end
diff --git a/src/telluric_functions.jl b/src/telluric_functions.jl
@@ -0,0 +1,122 @@
+using FITSIO
+using DataInterpolations
+
+teldir = "/home/kment/RV/telluric"      # Directory containing telluric model spectra
+
+get_h2o_spectrum(λ_values) = interp_telluric_file("$teldir/kpno_telluric_4.0_0.0_h2o_only.fits", λ_values)
+get_oxy_spectrum(λ_values) = interp_telluric_file("$teldir/kpno_telluric_4.0_0.0_oxy_only.fits", λ_values)
+get_tel_spectrum(λ_values) = interp_telluric_file("$teldir/kpno_telluric_4.0_0.0.fits", λ_values)
+
+"""
+    get_telluric_model(λ_values, pwv, z_angle; verbose=true)
+
+Estimate the telluric spectrum at a range of wavelenths (λ_values) for a specific zenith angle
+(z_angle, in degrees) and precipitable water vapor (pwv) value.
+"""
+function get_telluric_model(λ_values, pwv, z_angle; verbose::Bool=true)
+
+    @assert pwv <= 16 "Precipitable water vapor cannot exceed 16."
+    @assert z_angle < 60 "Zenith angle must be below 60."
+
+    pwv_files = [1, 2, 4, 8, 16]
+    z_files = [0, 34, 44, 51, 60]
+    i_pwv = 1   # Index of PWV file
+    i_z = 1     # Index of zenith angle file
+
+    for (i, x) in enumerate(pwv_files)
+        if pwv > x
+            i_pwv = i + 1
+        end
+    end
+    for (i, x) in enumerate(z_files)
+        if z_angle >= x
+            i_z = i
+        end
+    end
+
+    fn_model1 = "$teldir/kpno_telluric_$(pwv_files[i_pwv-1]).0_$(z_files[i_z]).0.fits"
+    fn_model2 = "$teldir/kpno_telluric_$(pwv_files[i_pwv]).0_$(z_files[i_z]).0.fits"
+    fn_model3 = "$teldir/kpno_telluric_$(pwv_files[i_pwv-1]).0_$(z_files[i_z+1]).0.fits"
+    fn_model4 = "$teldir/kpno_telluric_$(pwv_files[i_pwv]).0_$(z_files[i_z+1]).0.fits"
+    if verbose
+        println("Using models: $fn_model1 $fn_model2 $fn_model3 $fn_model4")
+    end
+
+    t_model1 = interp_telluric_file(fn_model1, λ_values)
+    t_model2 = interp_telluric_file(fn_model2, λ_values)
+    t_model3 = interp_telluric_file(fn_model3, λ_values)
+    t_model4 = interp_telluric_file(fn_model4, λ_values)
+
+    X = sec(z_angle * π / 180)          # Airmass
+    X_model12 = sec(z_files[i_z] * π / 180)
+    X_model34 = sec(z_files[i_z+1] * π / 180)
+    X_power12 = X / X_model12
+    X_power34 = X / X_model34
+    pwv_power12 = (pwv - pwv_files[i_pwv-1]) / (pwv_files[i_pwv] - pwv_files[i_pwv-1]) * X_power12
+    t_scaled12 = t_model1 .^ (X_power12 - pwv_power12) .* t_model2 .^ pwv_power12
+    pwv_power34 = (pwv - pwv_files[i_pwv-1]) / (pwv_files[i_pwv] - pwv_files[i_pwv-1]) * X_power34
+    t_scaled34 = t_model3 .^ (X_power34 - pwv_power34) .* t_model4 .^ pwv_power34
+
+    weight = (X - X_model12) / (X_model34 - X_model12)
+    t_scaled = t_scaled12 .* weight .+ t_scaled34 .* (1 - weight)
+
+    return t_scaled
+
+end
+
+"""
+    get_telluric_prior_spectrum(λ_values, species::Symbol=:all)
+
+Evaluate the spectrum used as a telluric prior at a range of wavelenths (λ_values).
+"""
+function get_telluric_prior_spectrum(λ_values, species::Symbol=:all)
+
+    if species == :all
+        return get_tel_spectrum(λ_values)
+    elseif species == :h2o
+        return get_h2o_spectrum(λ_values)
+    elseif species == :oxy
+        return get_oxy_spectrum(λ_values)
+    end
+
+end
+
+"""
+    interp_telluric_file(filename, λ_values)
+
+Estimate the telluric spectrum at a range of wavelenths (λ_values) by interpolating a model spectrum
+in a given FITS file.
+"""
+function interp_telluric_file(filename, λ_values)
+
+    f = FITS(filename)
+    λ_all = reverse(read(f[2], "wavelength")) .* 10
+    t_all = reverse(read(f[2], "transmittance"))
+
+    interp = LinearInterpolation(t_all, λ_all)
+    t_model = interp.(λ_values)
+    t_model[t_model .< 0] .= 0
+
+    return t_model
+
+end
+
+"""
+    simulate_telluric_data(λ_values, pwv_values, z_values)
+
+Simulate the telluric spectrum at a range of wavelenths (λ_values) for a list of precipitable water
+vapor values (pwv_values) and zenith angle values (z_values, in degrees).
+"""
+function simulate_telluric_data(λ_values, pwv_values, z_values)
+
+    @assert length(pwv_values) == length(z_values) "Must provide an equal number of PWV and zenith angle values."
+
+    t_all = Matrix{Float64}(undef, length(λ_values), length(pwv_values))
+
+    for i in range(1, length(pwv_values))
+        t_all[:,i] = get_telluric_model(λ_values, pwv_values[i], z_values[i]; verbose=false)
+    end
+
+    return t_all
+
+end