diff --git a/include/blas_quda.h b/include/blas_quda.h index 4c2b573f70..face9f56c2 100644 --- a/include/blas_quda.h +++ b/include/blas_quda.h @@ -202,8 +202,7 @@ namespace quda { } /** - @brief Compute the block "caxpy" with over the s -et of + @brief Compute the block "caxpy" with over the set of ColorSpinorFields. E.g., it computes y = x * a + y diff --git a/include/dirac_quda.h b/include/dirac_quda.h index 6884072332..f5f3af9979 100644 --- a/include/dirac_quda.h +++ b/include/dirac_quda.h @@ -1300,7 +1300,7 @@ namespace quda { @brief Return the one-hop field for staggered operators for MG setup @return Gauge field - */ + */ virtual cudaGaugeField *getStaggeredShortLinkField() const { return gauge; } /** diff --git a/include/enum_quda.h b/include/enum_quda.h index cffea9c7c3..df85a49223 100644 --- a/include/enum_quda.h +++ b/include/enum_quda.h @@ -197,12 +197,12 @@ typedef enum QudaResidualType_s { QUDA_INVALID_RESIDUAL = QUDA_INVALID_ENUM } QudaResidualType; -// Which basis to use for CA algorithms -typedef enum QudaCABasis_s { +// Which basis to use for polynomials, CA solver basis +typedef enum QudaPolynomialBasis_s { QUDA_POWER_BASIS, QUDA_CHEBYSHEV_BASIS, QUDA_INVALID_BASIS = QUDA_INVALID_ENUM -} QudaCABasis; +} QudaPolynomialBasis; // Whether the preconditioned matrix is (1-k^2 Deo Doe) or (1-k^2 Doe Deo) // @@ -446,17 +446,15 @@ typedef enum QudaDeflatedGuess_s { QUDA_DEFLATED_GUESS_INVALID = QUDA_INVALID_ENUM } QudaDeflatedGuess; -typedef enum QudaComputeNullVector_s { - QUDA_COMPUTE_NULL_VECTOR_NO, - QUDA_COMPUTE_NULL_VECTOR_YES, - QUDA_COMPUTE_NULL_VECTOR_INVALID = QUDA_INVALID_ENUM -} QudaComputeNullVector; - -typedef enum QudaSetupType_s { - QUDA_NULL_VECTOR_SETUP, - QUDA_TEST_VECTOR_SETUP, - QUDA_INVALID_SETUP_TYPE = QUDA_INVALID_ENUM -} QudaSetupType; +typedef enum QudaNullVectorSetupType_s { + QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS, + QUDA_SETUP_NULL_VECTOR_CHEBYSHEV_FILTER, + QUDA_SETUP_NULL_VECTOR_EIGENVECTORS, + QUDA_SETUP_NULL_VECTOR_TEST_VECTORS, + QUDA_SETUP_NULL_VECTOR_RESTRICT_FINE, + QUDA_SETUP_NULL_VECTOR_FREE_FIELD, + QUDA_SETUP_NULL_VECTOR_INVALID = QUDA_INVALID_ENUM +} QudaNullVectorSetupType; typedef enum QudaTransferType_s { QUDA_TRANSFER_AGGREGATE, diff --git a/include/enum_quda_fortran.h b/include/enum_quda_fortran.h index 5e17a9df8f..1fafd83091 100644 --- a/include/enum_quda_fortran.h +++ b/include/enum_quda_fortran.h @@ -181,7 +181,7 @@ #define QUDA_HEAVY_QUARK_RESIDUAL 4 #define QUDA_INVALID_RESIDUAL QUDA_INVALID_ENUM -#define QudaCABasis integer(4) +#define QudaPolynomialBasis integer(4) #define QUDA_POWER_BASIS 0 #define QUDA_CHEBYSHEV_BASIS 1 #define QUDA_INVALID_BASIS QUDA_INVALID_ENUM @@ -406,15 +406,14 @@ #define QUDA_USE_INIT_GUESS_YES 1 #define QUDA_USE_INIT_GUESS_INVALID QUDA_INVALID_ENUM -#define QudaComputeNullVector integer(4) -#define QUDA_COMPUTE_NULL_VECTOR_NO 0 -#define QUDA_COMPUTE_NULL_VECTOR_YES 1 -#define QUDA_COMPUTE_NULL_VECTOR_INVALID QUDA_INVALID_ENUM - -#define QudaSetupType integer(4) -#define QUDA_NULL_VECTOR_SETUP 0 -#define QUDA_TEST_VECTOR_SETUP 1 -#define QUDA_INVALID_SETUP_TYPE QUDA_INVALID_ENUM +#define QudaNullVectorSetupType integer(4) +#define QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS 0 +#define QUDA_SETUP_NULL_VECTOR_CHEBYSHEV_FILTER 1 +#define QUDA_SETUP_NULL_VECTOR_EIGENVECTOES 2 +#define QUDA_SETUP_NULL_VECTOR_TEST_VECTORS 3 +#define QUDA_SETUP_NULL_VECTOR_RESTRICT_FINE 4 +#define QUDA_SETUP_NULL_VECTOR_FREE_FIELD 5 +#define QUDA_SETUP_NULL_VECTOR_INVALID QUDA_INVALID_ENUM #define QudaTransferType integer(4) #define QUDA_TRANSFER_AGGREGATE 0 diff --git a/include/invert_quda.h b/include/invert_quda.h index b17b41ac58..a9a33d8321 100644 --- a/include/invert_quda.h +++ b/include/invert_quda.h @@ -60,8 +60,8 @@ namespace quda { /**< Whether to use an initial guess in the solver or not */ QudaUseInitGuess use_init_guess; - /**< Whether to solve linear system with zero RHS */ - QudaComputeNullVector compute_null_vector; + /**< Whether or not to allow a zero RHS solve for near-null vector generation */ + bool compute_null_vector; /**< Reliable update tolerance */ double delta; @@ -202,7 +202,7 @@ namespace quda { double omega; /** Basis for CA algorithms */ - QudaCABasis ca_basis; + QudaPolynomialBasis ca_basis; /** Minimum eigenvalue for Chebyshev CA basis */ double ca_lambda_min; @@ -211,7 +211,7 @@ namespace quda { double ca_lambda_max; // -1 -> power iter generate /** Basis for CA algorithms in a preconditioner */ - QudaCABasis ca_basis_precondition; + QudaPolynomialBasis ca_basis_precondition; /** Minimum eigenvalue for Chebyshev CA basis in a preconditioner */ double ca_lambda_min_precondition; @@ -269,7 +269,7 @@ namespace quda { Default constructor */ SolverParam() : - compute_null_vector(QUDA_COMPUTE_NULL_VECTOR_NO), + compute_null_vector(false), compute_true_res(true), sloppy_converge(false), verbosity_precondition(QUDA_SILENT), @@ -291,7 +291,7 @@ namespace quda { residual_type(param.residual_type), deflate(param.eig_param != 0), use_init_guess(param.use_init_guess), - compute_null_vector(QUDA_COMPUTE_NULL_VECTOR_NO), + compute_null_vector(false), delta(param.reliable_delta), use_alternative_reliable(param.use_alternative_reliable), use_sloppy_partial_accumulator(param.use_sloppy_partial_accumulator), @@ -708,40 +708,6 @@ namespace quda { */ void setRecomputeEvals(bool flag) { recompute_evals = flag; }; - /** - @brief Compute power iterations on a Dirac matrix - @param[in] diracm Dirac matrix used for power iterations - @param[in] start Starting rhs for power iterations; value preserved unless it aliases tempvec1 or tempvec2 - @param[in,out] tempvec1 Temporary vector used for power iterations (FIXME: can become a reference when std::swap - can be used on ColorSpinorField) - @param[in,out] tempvec2 Temporary vector used for power iterations (FIXME: can become a reference when std::swap - can be used on ColorSpinorField) - @param[in] niter Total number of power iteration iterations - @param[in] normalize_freq Frequency with which intermediate vector gets normalized - @param[in] args Parameter pack of ColorSpinorFields used as temporary passed to Dirac - @return Norm of final power iteration result - */ - template - static double performPowerIterations(const DiracMatrix &diracm, const ColorSpinorField &start, - ColorSpinorField &tempvec1, ColorSpinorField &tempvec2, int niter, - int normalize_freq, Args &&...args); - - /** - @brief Generate a Krylov space in a given basis - @param[in] diracm Dirac matrix used to generate the Krylov space - @param[out] Ap dirac matrix times the Krylov basis vectors - @param[in,out] p Krylov basis vectors; assumes p[0] is in place - @param[in] n_krylov Size of krylov space - @param[in] basis Basis type - @param[in] m_map Slope mapping for Chebyshev basis; ignored for power basis - @param[in] b_map Intercept mapping for Chebyshev basis; ignored for power basis - @param[in] args Parameter pack of ColorSpinorFields used as temporary passed to Dirac - */ - template - static void computeCAKrylovSpace(const DiracMatrix &diracm, std::vector &Ap, - std::vector &p, int n_krylov, QudaCABasis basis, double m_map, - double b_map, Args &&...args); - /** * @brief Return flops * @return flops expended by this operator @@ -1182,7 +1148,7 @@ namespace quda { bool init; bool lambda_init; - QudaCABasis basis; + QudaPolynomialBasis basis; std::vector Q_AQandg; // Fused inner product matrix std::vector Q_AS; // inner product matrix @@ -1320,7 +1286,7 @@ namespace quda { bool init; bool lambda_init; // whether or not lambda_max has been initialized - QudaCABasis basis; // CA basis + QudaPolynomialBasis basis; // CA basis std::vector alpha; // Solution coefficient vectors @@ -1694,4 +1660,10 @@ namespace quda { */ bool is_ca_solver(QudaInverterType type); + /** + @brief Returns if a solver supports deflation or not + @return true if solver supports deflation, false otherwise + */ + bool is_deflatable_solver(QudaInverterType type); + } // namespace quda diff --git a/include/multigrid.h b/include/multigrid.h index 0f219023c2..1cef678352 100644 --- a/include/multigrid.h +++ b/include/multigrid.h @@ -286,7 +286,7 @@ namespace quda { SolverParam *param_coarse_solver; /** The coarse-grid representation of the null space vectors */ - std::vector *B_coarse; + std::vector B_coarse; /** Residual vector */ ColorSpinorField *r; @@ -449,7 +449,7 @@ namespace quda { /** @brief Load the null space vectors in from file - @param B Loaded null-space vectors (pre-allocated) + @param B Load null-space vectors to here */ void loadVectors(std::vector &B); @@ -460,22 +460,59 @@ namespace quda { void saveVectors(const std::vector &B) const; /** - @brief Generate the null-space vectors + @brief Initialize a set of vectors to random noise + @param B set of vectors + */ + void initializeRandomVectors(const std::vector &B) const; + + /** + @brief Wrapping function that manages logic for constructing near-null vectors + @param refresh whether or not we're only refreshing near null vectors + */ + void constructNearNulls(bool refresh = false); + + /** + @brief Generate the null-space vectors via inverse iterations @param B Generated null-space vectors - @param refresh Whether we refreshing pre-exising vectors or starting afresh + @param iterations if non-zero, override the max number of iterations */ - void generateNullVectors(std::vector &B, bool refresh=false); + void generateInverseIterations(std::vector &B, int iterations = 0); + + /** + @brief Generate the null-space vectors via a Chebyshev filter; this approach is + based on arXiv:2103.05034, P. Boyle and A. Yamaguchi. + @param B Generated null-space vectors + */ + void generateChebyshevFilter(std::vector &B); /** @brief Generate lowest eigenvectors + @param B Generated null-space vectors + @param post_restrict whether or not we're generating extra eigenvectors after restricting + some fine eigenvectors; if true we override the requested n_conv in the multigrid + eigenvalue struct */ - void generateEigenVectors(); + void generateEigenvectors(std::vector &B, bool post_restrict = false); + + /** + @brief Generate near-null vectors via restricting finer near-nulls, generating extras if need be + @param refresh Whether or not we're only refreshing extra near nulls + */ + void generateRestrictedVectors(bool refresh = false); /** @brief Build free-field null-space vectors - @param B Free-field null-space vectors + @param B Generated null-space vectors + */ + void generateFreeVectors(std::vector &B); + + /** + @brief Orthonormalize a vector of ColorSpinorField, erroring out if + the fields are not sufficiently linearly independent + @param vecs vector of ColorSpinorFields to normalize + @param count number of near-null vectors to orthonormalize, default all */ - void buildFreeVectors(std::vector &B); + void orthonormalizeVectors(const std::vector& vecs, int count = -1) const; /** @brief Return the total flops done on this and all coarser levels. @@ -495,6 +532,14 @@ namespace quda { return (param.level == 0 || kd_nearnull_gen); } + + /** + @brief Return if we're on the coarsest grid right now + */ + bool is_coarsest_grid() const { + return (param.level == (param.Nlevel - 1)); + } + }; /** diff --git a/include/quda.h b/include/quda.h index f8c234e7a8..804f93203f 100644 --- a/include/quda.h +++ b/include/quda.h @@ -322,7 +322,7 @@ extern "C" { double omega; /** Basis for CA algorithms */ - QudaCABasis ca_basis; + QudaPolynomialBasis ca_basis; /** Minimum eigenvalue for Chebyshev CA basis */ double ca_lambda_min; @@ -331,7 +331,7 @@ extern "C" { double ca_lambda_max; /** Basis for CA algorithms in a preconditioned solver */ - QudaCABasis ca_basis_precondition; + QudaPolynomialBasis ca_basis_precondition; /** Minimum eigenvalue for Chebyshev CA basis in a preconditioner solver */ double ca_lambda_min_precondition; @@ -640,8 +640,11 @@ extern "C" { /** Maximum number of iterations for refreshing the null-space vectors */ int setup_maxiter_refresh[QUDA_MAX_MG_LEVEL]; + /** Maximum number of iterations for inverse iteration refinement of null-space vectors */ + int setup_maxiter_inverse_iterations_refinement[QUDA_MAX_MG_LEVEL]; + /** Basis to use for CA solver setup */ - QudaCABasis setup_ca_basis[QUDA_MAX_MG_LEVEL]; + QudaPolynomialBasis setup_ca_basis[QUDA_MAX_MG_LEVEL]; /** Basis size for CA solver setup */ int setup_ca_basis_size[QUDA_MAX_MG_LEVEL]; @@ -652,8 +655,29 @@ extern "C" { /** Maximum eigenvalue for Chebyshev CA basis */ double setup_ca_lambda_max[QUDA_MAX_MG_LEVEL]; + /** Number of random starting vectors for Chebyshev filter null space generation */ + int filter_startup_vectors[QUDA_MAX_MG_LEVEL]; + + /** Number of iterations for initial Chebyshev filter */ + int filter_startup_iterations[QUDA_MAX_MG_LEVEL]; + + /** Frequency of rescales during initial filtering which helps avoid overflow */ + int filter_startup_rescale_frequency[QUDA_MAX_MG_LEVEL]; + + /** Number of iterations between null vectors generated from each starting vector */ + int filter_iterations_between_vectors[QUDA_MAX_MG_LEVEL]; + + /** Conservative estimate of largest eigenvalue of operator used for Chebyshev filter setup */ + double filter_lambda_min[QUDA_MAX_MG_LEVEL]; + + /** Lower bound of eigenvalues that are not enhanced by the initial Chebyshev filter */ + double filter_lambda_max[QUDA_MAX_MG_LEVEL]; + /** Null-space type to use in the setup phase */ - QudaSetupType setup_type; + QudaNullVectorSetupType setup_type[QUDA_MAX_MG_LEVEL]; + + /** Null-space type to use to "fill in" extra vectors after restricting some */ + QudaNullVectorSetupType setup_restrict_remaining_type[QUDA_MAX_MG_LEVEL]; /** Pre orthonormalize vectors in the setup phase */ QudaBoolean pre_orthonormalize; @@ -671,7 +695,7 @@ extern "C" { int coarse_solver_maxiter[QUDA_MAX_MG_LEVEL]; /** Basis to use for CA coarse solvers */ - QudaCABasis coarse_solver_ca_basis[QUDA_MAX_MG_LEVEL]; + QudaPolynomialBasis coarse_solver_ca_basis[QUDA_MAX_MG_LEVEL]; /** Basis size for CA coarse solvers */ int coarse_solver_ca_basis_size[QUDA_MAX_MG_LEVEL]; @@ -695,7 +719,7 @@ extern "C" { int nu_post[QUDA_MAX_MG_LEVEL]; /** Basis to use for CA smoother solvers */ - QudaCABasis smoother_solver_ca_basis[QUDA_MAX_MG_LEVEL]; + QudaPolynomialBasis smoother_solver_ca_basis[QUDA_MAX_MG_LEVEL]; /** Minimum eigenvalue for Chebyshev CA smoother basis */ double smoother_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL]; @@ -716,7 +740,7 @@ extern "C" { int smoother_schwarz_cycle[QUDA_MAX_MG_LEVEL]; /** The type of residual to send to the next coarse grid, and thus the - type of solution to receive back from this coarse grid */ + type of solution to receive back from this coarse grid */ QudaSolutionType coarse_grid_solution_type[QUDA_MAX_MG_LEVEL]; /** The type of smoother solve to do on each grid (e/o preconditioning or not)*/ @@ -742,12 +766,6 @@ extern "C" { memory */ QudaBoolean setup_minimize_memory; - /** Whether to compute the null vectors or reload them */ - QudaComputeNullVector compute_null_vector; - - /** Whether to generate on all levels or just on level 0 */ - QudaBoolean generate_all_levels; - /** Whether to run the verification checks once set up is complete */ QudaBoolean run_verify; diff --git a/lib/check_params.h b/lib/check_params.h index ffd7b4f3af..deb1dc5089 100644 --- a/lib/check_params.h +++ b/lib/check_params.h @@ -760,12 +760,6 @@ void printQudaMultigridParam(QudaMultigridParam *param) { int n_level = param->n_level; #endif -#ifdef INIT_PARAM - P(setup_type, QUDA_NULL_VECTOR_SETUP); -#else - P(setup_type, QUDA_INVALID_SETUP_TYPE); -#endif - #ifdef INIT_PARAM P(pre_orthonormalize, QUDA_BOOLEAN_FALSE); #else @@ -779,6 +773,14 @@ void printQudaMultigridParam(QudaMultigridParam *param) { #endif for (int i=0; igenerate_all_levels == QUDA_BOOLEAN_FALSE && param->compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_YES) { - for (int i=1; in_vec[0] != param->n_vec[i]) - errorQuda("n_vec %d != %d must be equal on all levels if generate_all_levels == false", - param->n_vec[0], param->n_vec[i]); - } -#endif - P(run_verify, QUDA_BOOLEAN_INVALID); #ifdef INIT_PARAM diff --git a/lib/eigensolve_quda.cpp b/lib/eigensolve_quda.cpp index 9dd059965e..c1adf9b231 100644 --- a/lib/eigensolve_quda.cpp +++ b/lib/eigensolve_quda.cpp @@ -354,8 +354,8 @@ namespace quda // C_1 is the current 'out' vector. // Clone 'in' to two temporary vectors. - auto tmp1 = std::make_unique(in); - auto tmp2 = std::make_unique(out); + ColorSpinorField tmp_1(in); + ColorSpinorField tmp_2(out); // Using Chebyshev polynomial recursion relation, // C_{m+1}(x) = 2*x*C_{m} - C_{m-1} @@ -372,14 +372,14 @@ namespace quda // FIXME - we could introduce a fused matVec + blas kernel here, eliminating one temporary // mat*C_{m}(x) - matVec(mat, out, *tmp2); + matVec(mat, out, tmp_2); - blas::axpbypczw(d3, *tmp1, d2, *tmp2, d1, out, *tmp1); - std::swap(tmp1, tmp2); + blas::axpbypczw(d3, tmp_1, d2, tmp_2, d1, out, tmp_1); + std::swap(tmp_1, tmp_2); sigma_old = sigma; } - blas::copy(out, *tmp2); + blas::copy(out, tmp_2); // Save Chebyshev tuning saveTuneCache(); diff --git a/lib/inv_bicgstab_quda.cpp b/lib/inv_bicgstab_quda.cpp index 7374241575..ab8874c2f4 100644 --- a/lib/inv_bicgstab_quda.cpp +++ b/lib/inv_bicgstab_quda.cpp @@ -92,7 +92,7 @@ namespace quda { // Check to see that we're not trying to invert on a zero-field source if (b2 == 0) { - if (param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) { + if (!param.compute_null_vector) { warningQuda("inverting on zero-field source"); x = b; param.true_res = 0.0; @@ -110,7 +110,7 @@ namespace quda { if (param.precision_sloppy == x.Precision()) { r_sloppy = &r; - if(param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) + if(!param.compute_null_vector) { r_0 = &b; } @@ -333,7 +333,7 @@ namespace quda { delete r_0; delete r_sloppy; } - else if(param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_YES) + else if (param.compute_null_vector) { delete r_0; } diff --git a/lib/inv_bicgstabl_quda.cpp b/lib/inv_bicgstabl_quda.cpp index a134efdf75..c664bf8082 100644 --- a/lib/inv_bicgstabl_quda.cpp +++ b/lib/inv_bicgstabl_quda.cpp @@ -549,7 +549,7 @@ namespace quda { // Check to see that we're not trying to invert on a zero-field source if (b2 == 0) { - if (param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) { + if (!param.compute_null_vector) { warningQuda("inverting on zero-field source"); x = b; param.true_res = 0.0; @@ -567,7 +567,7 @@ namespace quda { // There probably be bugs and headaches hiding here. if (param.precision_sloppy == x.Precision()) { r[0] = &r_full; // r[0] \equiv r_sloppy points to the same memory location as r. - if (param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) + if (!param.compute_null_vector) { r0p = &b; // r0, b point to the same vector in memory. } diff --git a/lib/inv_ca_cg.cpp b/lib/inv_ca_cg.cpp index 6c98a3f418..8dab247baf 100644 --- a/lib/inv_ca_cg.cpp +++ b/lib/inv_ca_cg.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include /** @file inv_ca_cg.cpp @@ -488,7 +488,14 @@ namespace quda // Perform 100 power iterations, normalizing every 10 mat-vecs, using r as an initial seed // and Q[0]/AQ[0] as temporaries for the power iterations. tmp_sloppy/tmp2_sloppy get passed in as temporaries // for matSloppy. - lambda_max = 1.1 * Solver::performPowerIterations(matSloppy, r, Q[0], AQ[0], 100, 10, tmp_sloppy, tmp2_sloppy); + PolynomialBasisParams basis_params; + basis_params.basis = QUDA_POWER_BASIS; + basis_params.n_order = 100; + basis_params.normalize_freq = 10; + basis_params.tmp_vectors.emplace_back(std::ref(Q[0])); + basis_params.tmp_vectors.emplace_back(std::ref(AQ[1])); + lambda_max = 1.1 * performPowerIterations(matSloppy, r, basis_params, tmp_sloppy, tmp2_sloppy); + logQuda(QUDA_SUMMARIZE, "CA-CG Approximate lambda max = 1.1 x %e\n", lambda_max / 1.1); lambda_init = true; @@ -501,7 +508,7 @@ namespace quda // Check to see that we're not trying to invert on a zero-field source if (b2 == 0) { - if (param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) { + if (!param.compute_null_vector) { warningQuda("inverting on zero-field source\n"); x = b; param.true_res = 0.0; @@ -544,9 +551,13 @@ namespace quda double maxrr = rNorm; // The maximum residual norm since the last reliable update. double maxr_deflate = rNorm; // The maximum residual since the last deflation - // Factors which map linear operator onto [-1,1] - double m_map = 2. / (lambda_max - lambda_min); - double b_map = -(lambda_max + lambda_min) / (lambda_max - lambda_min); + // Parameters for the basis polynomial + PolynomialBasisParams ca_params; + ca_params.basis = basis; + ca_params.n_order = n_krylov; + ca_params.normalize_freq = 0; + ca_params.m_map = PolynomialBasisParams::compute_m_map(lambda_min, lambda_max); + ca_params.b_map = PolynomialBasisParams::compute_b_map(lambda_min, lambda_max); blas::copy(S[0], r); // no op if uni-precision @@ -554,7 +565,7 @@ namespace quda while (!convergence(r2, heavy_quark_res, stop, param.tol_hq) && total_iter < param.maxiter) { // build up a space of size n_krylov, assumes S[0] is in place - computeCAKrylovSpace(matSloppy, AS, S, n_krylov, basis, m_map, b_map, tmp_sloppy, tmp2_sloppy); + computeCAKrylovSpace(matSloppy, AS, S, ca_params, tmp_sloppy, tmp2_sloppy); // we can greatly simplify the workflow for fixed iterations if (!fixed_iteration) { diff --git a/lib/inv_ca_gcr.cpp b/lib/inv_ca_gcr.cpp index d4e357b0ec..8ee5b5e79a 100644 --- a/lib/inv_ca_gcr.cpp +++ b/lib/inv_ca_gcr.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include namespace quda { @@ -229,7 +229,14 @@ namespace quda // Perform 100 power iterations, normalizing every 10 mat-vecs, using r_ as an initial seed // and q[0]/q[1] as temporaries for the power iterations. tmpSloppy get passed in a temporary // for matSloppy. Technically illegal if n_krylov == 1, but in that case lambda_max isn't used anyway. - lambda_max = 1.1 * Solver::performPowerIterations(matSloppy, r, q[0], q[1], 100, 10, tmp_sloppy); + PolynomialBasisParams basis_params; + basis_params.basis = QUDA_POWER_BASIS; + basis_params.n_order = 100; + basis_params.normalize_freq = 10; + basis_params.tmp_vectors.emplace_back(std::ref(q[0])); + basis_params.tmp_vectors.emplace_back(std::ref(q[1])); + lambda_max = 1.1 * performPowerIterations(matSloppy, r, basis_params, tmp_sloppy); + logQuda(QUDA_SUMMARIZE, "CA-GCR Approximate lambda max = 1.1 x %e\n", lambda_max / 1.1); lambda_init = true; @@ -240,13 +247,17 @@ namespace quda } } - // Factors which map linear operator onto [-1,1] - double m_map = 2. / (lambda_max - lambda_min); - double b_map = -(lambda_max + lambda_min) / (lambda_max - lambda_min); + // Parameters for the basis polynomial + PolynomialBasisParams ca_params; + ca_params.basis = basis; + ca_params.n_order = n_krylov; + ca_params.normalize_freq = 0; + ca_params.m_map = PolynomialBasisParams::compute_m_map(lambda_min, lambda_max); + ca_params.b_map = PolynomialBasisParams::compute_b_map(lambda_min, lambda_max); // Check to see that we're not trying to invert on a zero-field source if (b2 == 0) { - if (param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) { + if (!param.compute_null_vector) { warningQuda("inverting on zero-field source\n"); x = b; param.true_res = 0.0; @@ -290,7 +301,7 @@ namespace quda while (!convergence(r2, heavy_quark_res, stop, param.tol_hq) && total_iter < param.maxiter) { // build up a space of size n_krylov - computeCAKrylovSpace(matSloppy, q, p, n_krylov, basis, m_map, b_map, tmp_sloppy); + computeCAKrylovSpace(matSloppy, q, p, ca_params, tmp_sloppy); solve(alpha, q, p[0]); diff --git a/lib/inv_cg3_quda.cpp b/lib/inv_cg3_quda.cpp index 567a9c639e..ec79421563 100644 --- a/lib/inv_cg3_quda.cpp +++ b/lib/inv_cg3_quda.cpp @@ -204,7 +204,7 @@ namespace quda { // Check to see that we're not trying to invert on a zero-field source double b2 = blas::norm2(b); if(b2 == 0 && - (param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO || param.use_init_guess == QUDA_USE_INIT_GUESS_NO)){ + (!param.compute_null_vector || param.use_init_guess == QUDA_USE_INIT_GUESS_NO)){ profile.TPSTOP(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x = b; diff --git a/lib/inv_cg_quda.cpp b/lib/inv_cg_quda.cpp index 8883e35b1b..19627367a4 100644 --- a/lib/inv_cg_quda.cpp +++ b/lib/inv_cg_quda.cpp @@ -252,7 +252,7 @@ namespace quda { double b2 = blas::norm2(b); // Check to see that we're not trying to invert on a zero-field source - if (b2 == 0 && param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) { + if (b2 == 0 && !param.compute_null_vector) { if (!param.is_preconditioner) profile.TPSTOP(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x = b; @@ -567,7 +567,7 @@ namespace quda { } // if L2 broke down already we turn off reliable updates and restart the CG - if (ru.reliable_heavy_quark_break(L2breakdown, heavy_quark_res, heavy_quark_res_old, heavy_quark_restart)) { + if (use_heavy_quark_res && ru.reliable_heavy_quark_break(L2breakdown, heavy_quark_res, heavy_quark_res_old, heavy_quark_restart)) { break; } diff --git a/lib/inv_gcr_quda.cpp b/lib/inv_gcr_quda.cpp index 22546d17ee..72baaaf277 100644 --- a/lib/inv_gcr_quda.cpp +++ b/lib/inv_gcr_quda.cpp @@ -327,7 +327,7 @@ namespace quda { // Check to see that we're not trying to invert on a zero-field source if (b2 == 0) { - if (param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) { + if (!param.compute_null_vector) { profile.TPSTOP(QUDA_PROFILE_INIT); warningQuda("inverting on zero-field source\n"); x = b; diff --git a/lib/inv_pcg_quda.cpp b/lib/inv_pcg_quda.cpp index d2aa935628..a507cacd70 100644 --- a/lib/inv_pcg_quda.cpp +++ b/lib/inv_pcg_quda.cpp @@ -108,7 +108,7 @@ namespace quda double b2 = blas::norm2(b); // Check to see that we're not trying to invert on a zero-field source - if (b2 == 0 && param.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_NO) { + if (b2 == 0 && !param.compute_null_vector) { profile.TPSTOP(QUDA_PROFILE_INIT); printfQuda("Warning: inverting on zero-field source\n"); x = b; diff --git a/lib/milc_interface.cpp b/lib/milc_interface.cpp index 5a893dcebf..37142f9cb0 100644 --- a/lib/milc_interface.cpp +++ b/lib/milc_interface.cpp @@ -1965,6 +1965,10 @@ void milcSetMultigridParam(milcMultigridPack *mg_pack, QudaPrecision host_precis mg_param.setup_tol[i] = input_struct.setup_tol[i]; mg_param.setup_maxiter[i] = input_struct.setup_maxiter[i]; + // change this to refresh fields when mass or links change + mg_param.setup_maxiter_refresh[i] = 0; // setup_maxiter_refresh[i]; + mg_param.setup_maxiter_inverse_iterations_refinement[i] = 0; // setup_maxiter_inverse_iterations_refinement[i]; + // Basis to use for CA solver setup --- heuristic for CA-GCR is empirical if (is_ca_solver(input_struct.setup_inv[i])) { if (input_struct.setup_inv[i] == QUDA_CA_GCR_INVERTER && input_struct.setup_ca_basis_size[i] <= 8) @@ -1975,6 +1979,12 @@ void milcSetMultigridParam(milcMultigridPack *mg_pack, QudaPrecision host_precis mg_param.setup_ca_basis[i] = QUDA_POWER_BASIS; // setup_ca_basis[i]; } + // Setup type to use (inverse iterations, chebyshev filter, eigenvectors, restriction, free field) + mg_param.setup_type[i] = QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS; // setup_type[i]; + + // Setup type to use to generate remaining near-null vectors when some are restricted + mg_param.setup_restrict_remaining_type[i] = QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS; // setup_restrict_remaining_type[i]; + // Basis size for CA solver setup mg_param.setup_ca_basis_size[i] = input_struct.setup_ca_basis_size[i]; @@ -1983,8 +1993,7 @@ void milcSetMultigridParam(milcMultigridPack *mg_pack, QudaPrecision host_precis mg_param.setup_ca_lambda_max[i] = -1.0; // use power iterations // setup_ca_lambda_max[i]; mg_param.spin_block_size[i] = 1; - // change this to refresh fields when mass or links change - mg_param.setup_maxiter_refresh[i] = 0; // setup_maxiter_refresh[i]; + mg_param.n_vec[i] = (i == 0) ? ((input_struct.optimized_kd == QUDA_TRANSFER_COARSE_KD) ? 24 : 3) : input_struct.nvec[i]; mg_param.n_block_ortho[i] = 2; // n_block_ortho[i]; // number of times to Gram-Schmidt @@ -2157,15 +2166,9 @@ void milcSetMultigridParam(milcMultigridPack *mg_pack, QudaPrecision host_precis || input_struct.optimized_kd == QUDA_TRANSFER_OPTIMIZED_KD_DROP_LONG) mg_param.spin_block_size[1] = 0; - mg_param.setup_type = QUDA_NULL_VECTOR_SETUP; // setup_type; mg_param.pre_orthonormalize = QUDA_BOOLEAN_FALSE; // pre_orthonormalize ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; mg_param.post_orthonormalize = QUDA_BOOLEAN_TRUE; // post_orthonormalize ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.compute_null_vector - = QUDA_COMPUTE_NULL_VECTOR_YES; // generate_nullspace ? QUDA_COMPUTE_NULL_VECTOR_YES : QUDA_COMPUTE_NULL_VECTOR_NO; - - mg_param.generate_all_levels = QUDA_BOOLEAN_TRUE; // generate_all_levels ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.run_verify = input_struct.verify_results ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; mg_param.run_low_mode_check = QUDA_BOOLEAN_FALSE; // low_mode_check ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; mg_param.run_oblique_proj_check = QUDA_BOOLEAN_FALSE; // oblique_proj_check ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; diff --git a/lib/multigrid.cpp b/lib/multigrid.cpp index 028111ee6d..cba32728ca 100644 --- a/lib/multigrid.cpp +++ b/lib/multigrid.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include // for building the KD inverse op #include @@ -30,6 +32,7 @@ namespace quda param_presmooth(nullptr), param_postsmooth(nullptr), param_coarse_solver(nullptr), + B_coarse(), r(nullptr), b_tilde(nullptr), r_coarse(nullptr), @@ -84,37 +87,25 @@ namespace quda rng = new RNG(*param.B[0], 1234); - if (param.transfer_type == QUDA_TRANSFER_AGGREGATE) { - if (param.level < param.Nlevel - 1) { - if (param.mg_global.compute_null_vector == QUDA_COMPUTE_NULL_VECTOR_YES) { - if (param.mg_global.generate_all_levels == QUDA_BOOLEAN_TRUE || param.level == 0) { - - // Initializing to random vectors - for (int i = 0; i < (int)param.B.size(); i++) { - spinorNoise(*r, *rng, QUDA_NOISE_UNIFORM); - *param.B[i] = *r; - } - } - if (param.mg_global.num_setup_iter[param.level] > 0) { - if (param.mg_global.vec_load[param.level] == QUDA_BOOLEAN_TRUE - && strcmp(param.mg_global.vec_infile[param.level], "") - != 0) { // only load if infile is defined and not computing - loadVectors(param.B); - } else if (param.mg_global.use_eig_solver[param.level]) { - generateEigenVectors(); // Run the eigensolver - } else { - generateNullVectors(param.B); - } - } - } else if (strcmp(param.mg_global.vec_infile[param.level], "") - != 0) { // only load if infile is defined and not computing - if (param.mg_global.num_setup_iter[param.level] > 0) generateNullVectors(param.B); - } else if (param.mg_global.vec_load[param.level] == QUDA_BOOLEAN_TRUE) { // only conditional load of null vectors - loadVectors(param.B); - } else { // generate free field vectors - buildFreeVectors(param.B); - } - } + if (!is_coarsest_grid()) { + // allocate coarse temporary vectors + tmp_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), + param.mg_global.location[param.level + 1]); + tmp2_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), + param.mg_global.location[param.level + 1]); + r_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), + param.mg_global.location[param.level + 1]); + x_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), + param.mg_global.location[param.level + 1]); + } + + if (param.transfer_type == QUDA_TRANSFER_AGGREGATE && !is_coarsest_grid()) { + + constructNearNulls(); + + // only save if outfile is defined + if (strcmp(param.mg_global.vec_outfile[param.level], "") != 0) + saveVectors(param.B); } // in case of iterative setup with MG the coarse level may be already built @@ -126,7 +117,7 @@ namespace quda void MG::reset(bool refresh) { pushLevel(param.level); - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("%s level %d\n", transfer ? "Resetting" : "Creating", param.level); + logQuda(QUDA_VERBOSE, "%s level %d\n", transfer ? "Resetting" : "Creating", param.level); destroySmoother(); destroyCoarseSolver(); @@ -140,13 +131,14 @@ namespace quda // if we aren't doing a staggered KD solve if (param.level != 0 || param.transfer_type == QUDA_TRANSFER_AGGREGATE) { // Refresh the null-space vectors if we need to - if (refresh && param.level < param.Nlevel - 1) { - if (param.mg_global.setup_maxiter_refresh[param.level]) generateNullVectors(param.B, refresh); + // FIXME: can we meaningfully refresh Chebyshev filter null vecs? + if (refresh && !is_coarsest_grid()) { + if (param.mg_global.setup_maxiter_refresh[param.level]) constructNearNulls(refresh); } } // if not on the coarsest level, update next - if (param.level < param.Nlevel-1) { + if (!is_coarsest_grid()) { if (transfer) { // restoring FULL parity in Transfer changed at the end of this procedure @@ -163,43 +155,14 @@ namespace quda param.mg_global.transfer_type[param.level], profile); for (int i=0; iCreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), - param.mg_global.location[param.level + 1]); - - // create coarse temporary vector if not already created in verify() - if (!tmp2_coarse) - tmp2_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), - param.mg_global.location[param.level + 1]); - - // create coarse residual vector if not already created in verify() - if (!r_coarse) - r_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), - param.mg_global.location[param.level + 1]); - - // create coarse solution vector if not already created in verify() - if (!x_coarse) - x_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), - param.mg_global.location[param.level + 1]); - - B_coarse = new std::vector(); - int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level + 1]); - B_coarse->resize(nVec_coarse); + int n_vec_coarse = param.mg_global.n_vec[param.level + 1]; + B_coarse.resize(n_vec_coarse); // only have single precision B vectors on the coarse grid QudaPrecision B_coarse_precision = std::max(param.mg_global.precision_null[param.level+1], QUDA_SINGLE_PRECISION); - for (int i=0; iCreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, B_coarse_precision, param.mg_global.setup_location[param.level+1]); - - // if we're not generating on all levels then we need to propagate the vectors down - if ((param.level != 0 || param.Nlevel - 1) && param.mg_global.generate_all_levels == QUDA_BOOLEAN_FALSE) { - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Restricting null space vectors\n"); - for (int i=0; iR(*(*B_coarse)[i], *(param.B[i])); - } - } + for (int i = 0; i < n_vec_coarse; i++) + B_coarse[i] = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, B_coarse_precision, param.mg_global.setup_location[param.level+1]); + if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Transfer operator done\n"); } @@ -213,7 +176,7 @@ namespace quda // delay allocating smoother until after coarse-links have been created createSmoother(); - if (param.level < param.Nlevel-1) { + if (!is_coarsest_grid()) { // If enabled, verify the coarse links and fine solvers were correctly built if (param.mg_global.run_verify) verify(); @@ -228,7 +191,7 @@ namespace quda coarse->reset(refresh); } else { // create the next multigrid level - param_coarse = new MGParam(param, *B_coarse, matCoarseResidual, matCoarseSmoother, matCoarseSmootherSloppy, + param_coarse = new MGParam(param, B_coarse, matCoarseResidual, matCoarseSmoother, matCoarseSmootherSloppy, param.level + 1); param_coarse->fine = this; param_coarse->delta = 1e-20; @@ -330,7 +293,7 @@ namespace quda pushLevel(param.level); // create the smoother for this level - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Creating smoother\n"); + logQuda(QUDA_VERBOSE, "Creating smoother\n"); destroySmoother(); param_presmooth = new SolverParam(param); @@ -348,7 +311,7 @@ namespace quda param_presmooth->inv_type_precondition = QUDA_INVALID_INVERTER; param_presmooth->residual_type = (param_presmooth->inv_type == QUDA_MR_INVERTER) ? QUDA_INVALID_RESIDUAL : QUDA_L2_RELATIVE_RESIDUAL; param_presmooth->Nsteps = param.mg_global.smoother_schwarz_cycle[param.level]; - param_presmooth->maxiter = (param.level < param.Nlevel-1) ? param.nu_pre : param.nu_pre + param.nu_post; + param_presmooth->maxiter = (is_coarsest_grid()) ? (param.nu_pre + param.nu_post) : param.nu_pre; param_presmooth->Nkrylov = param_presmooth->maxiter; param_presmooth->pipeline = param_presmooth->maxiter; @@ -368,12 +331,12 @@ namespace quda // inner solver should recompute the true residual after each cycle if using Schwarz preconditioning param_presmooth->compute_true_res = (param_presmooth->schwarz_type != QUDA_INVALID_SCHWARZ) ? true : false; - presmoother = ((param.level < param.Nlevel - 1 || param_presmooth->schwarz_type != QUDA_INVALID_SCHWARZ) + presmoother = ((!is_coarsest_grid() || param_presmooth->schwarz_type != QUDA_INVALID_SCHWARZ) && param_presmooth->inv_type != QUDA_INVALID_INVERTER && param_presmooth->maxiter > 0) ? Solver::create(*param_presmooth, *param.matSmooth, *param.matSmoothSloppy, *param.matSmoothSloppy, *param.matSmoothSloppy, profile) : nullptr; - if (param.level < param.Nlevel - 1) { // Create the post smoother + if (!is_coarsest_grid()) { // Create the post smoother param_postsmooth = new SolverParam(*param_presmooth); param_postsmooth->return_residual = false; // post smoother does not need to return the residual vector param_postsmooth->use_init_guess = QUDA_USE_INIT_GUESS_YES; @@ -663,11 +626,8 @@ namespace quda param_coarse_solver->use_init_guess = QUDA_USE_INIT_GUESS_YES; } - // Deflation on the coarse is supported for 6 solvers only - if (param_coarse_solver->inv_type != QUDA_CA_CGNR_INVERTER && param_coarse_solver->inv_type != QUDA_CGNR_INVERTER - && param_coarse_solver->inv_type != QUDA_CA_CGNE_INVERTER && param_coarse_solver->inv_type != QUDA_CGNE_INVERTER - && param_coarse_solver->inv_type != QUDA_CA_GCR_INVERTER && param_coarse_solver->inv_type != QUDA_GCR_INVERTER - && param_coarse_solver->inv_type != QUDA_BICGSTABL_INVERTER) { + // Deflation is not supported for all solvers + if (!is_deflatable_solver(param_coarse_solver->inv_type)) { errorQuda("Coarse grid deflation not supported with coarse solver %d", param_coarse_solver->inv_type); } @@ -749,8 +709,7 @@ namespace quda // vectors stored in the parent MG instead coarse_solver_inner.setDeflateCompute(false); coarse_solver_inner.setRecomputeEvals(true); - if (getVerbosity() >= QUDA_VERBOSE) - printfQuda("Transferring deflation space size %d to coarse solver\n", defl_size); + logQuda(QUDA_VERBOSE,"Transferring deflation space size %d to coarse solver\n", defl_size); // Create space in coarse solver to hold deflation space, destroy space in MG. coarse_solver_inner.injectDeflationSpace(evecs); } @@ -764,11 +723,11 @@ namespace quda param_coarse_solver->maxiter = param.mg_global.coarse_solver_maxiter[param.level + 1]; } - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Assigned coarse solver to preconditioned GCR solver\n"); + logQuda(QUDA_VERBOSE, "Assigned coarse solver to preconditioned GCR solver\n"); } else { errorQuda("Multigrid cycle type %d not supported", param.cycle_type); } - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Coarse solver wrapper done\n"); + logQuda(QUDA_VERBOSE, "Coarse solver wrapper done\n"); popLevel(); } @@ -780,16 +739,13 @@ namespace quda if (param.level < param.Nlevel - 1) { if (coarse) delete coarse; if (param.level == param.Nlevel-1 || param.cycle_type == QUDA_MG_CYCLE_RECURSIVE) { - if (coarse_solver) delete coarse_solver; - if (param_coarse_solver) delete param_coarse_solver; + if (coarse_solver) delete coarse_solver; + if (param_coarse_solver) delete param_coarse_solver; } - if (B_coarse) { - int nVec_coarse = std::max(param.Nvec, param.mg_global.n_vec[param.level + 1]); - for (int i = 0; i < nVec_coarse; i++) - if ((*B_coarse)[i]) delete (*B_coarse)[i]; - delete B_coarse; - } + int n_vec_coarse = param.mg_global.n_vec[param.level + 1]; + for (int i = 0; i < n_vec_coarse; i++) + if (B_coarse[i]) delete B_coarse[i]; if (transfer) delete transfer; if (matCoarseSmootherSloppy) delete matCoarseSmootherSloppy; if (diracCoarseSmootherSloppy) delete diracCoarseSmootherSloppy; @@ -824,6 +780,63 @@ namespace quda popLevel(); } + void MG::orthonormalizeVectors(const std::vector& vecs, int count) const { + + int num_vec = (count == -1) ? static_cast(vecs.size()) : count; + + if (num_vec > (int)vecs.size()) + errorQuda("Trying to orthonormalize %d vectors which is larger than std::vector size %ld", num_vec, vecs.size()); + + typedef Matrix matrix; + + // Create std::vector of references to ColorSpinorFields we're orthonormalizing + std::vector ortho_vecs; + for (int i = 0; i < num_vec; i++) + ortho_vecs.emplace_back(std::ref(*vecs[i])); + + // Prepare to do the Cholesky decomposition for a thin-QR + std::vector Vdagv_(num_vec * num_vec); + + // outstanding bugfix + //hDotProduct(Vdagv_, ortho_vecs, ortho_vecs); + cDotProduct(Vdagv_, ortho_vecs, ortho_vecs); + + // perform Cholesky + matrix Vdagv(num_vec, num_vec); + for (int i = 0; i < num_vec; i++) + for (int j = 0; j < num_vec; j++) + Vdagv(i, j) = Vdagv_[i * num_vec + j]; + matrix sigma = Vdagv.llt().matrixL(); + + // perform tall-skinny QR to get Q + matrix R_inv = sigma.adjoint().inverse(); + +#if 0 + // Given a working cax_U... + // Tracked at https://github.com/lattice/quda/issues/1286 + std::vector R_inv_(num_vec * num_vec); + for (int i = 0; i < num_vec; i++) + for (int j = 0; j < num_vec; j++) + R_inv_[i * num_vec + j] = R_inv(i, j); + cax_U(R_inv_, ortho_vecs); + +#else + ColorSpinorParam csParam(*vecs[0]); + csParam.create = QUDA_NULL_FIELD_CREATE; + ColorSpinorField tmp_field(csParam); + for (int i = num_vec - 1; i >= 0; i--) { + blas::zero(tmp_field); + std::vector coeff(i + 1); + for (int j = 0; j < i + 1; j++) + coeff[j] = R_inv(j, i); + std::vector vecs_subset(ortho_vecs.begin(), ortho_vecs.begin() + i + 1); + caxpy(coeff, vecs_subset, tmp_field); + blas::copy(ortho_vecs[i], tmp_field); + } +#endif + + } + // FIXME need to make this more robust (implement Solver::flops() for all solvers) double MG::flops() const { double flops = 0; @@ -889,8 +902,7 @@ namespace quda // No need to check (projector) v_k for staggered case if (param.transfer_type == QUDA_TRANSFER_AGGREGATE) { - if (getVerbosity() >= QUDA_SUMMARIZE) - printfQuda("Checking 0 = (1 - P P^\\dagger) v_k for %d vectors\n", param.Nvec); + logQuda(QUDA_SUMMARIZE, "Checking 0 = (1 - P P^\\dagger) v_k for %d vectors\n", param.Nvec); for (int i = 0; i < param.Nvec; i++) { // as well as copying to the correct location this also changes basis if necessary @@ -956,28 +968,6 @@ namespace quda } #endif - // We need to create temporary coarse vectors here for various verifications - // Otherwise these get created in the coarse `reset()` routine later - - if (!tmp_coarse) - tmp_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), - param.mg_global.location[param.level + 1]); - - // create coarse temporary vector if not already created in verify() - if (!tmp2_coarse) - tmp2_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), - param.mg_global.location[param.level + 1]); - - // create coarse residual vector if not already created in verify() - if (!r_coarse) - r_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), - param.mg_global.location[param.level + 1]); - - // create coarse solution vector if not already created in verify() - if (!x_coarse) - x_coarse = param.B[0]->CreateCoarse(param.geoBlockSize, param.spinBlockSize, param.Nvec, r->Precision(), - param.mg_global.location[param.level + 1]); - if (getVerbosity() >= QUDA_SUMMARIZE) printfQuda("Checking 0 = (1 - P^\\dagger P) eta_c\n"); spinorNoise(*x_coarse, *rng, QUDA_NOISE_UNIFORM); @@ -1177,7 +1167,7 @@ namespace quda // Reuse the space for the Null vectors. By this point, // the coarse grid has already been constructed. - generateEigenVectors(); + generateEigenvectors(param.B); for (int i = 0; i < param.Nvec; i++) { @@ -1231,7 +1221,7 @@ namespace quda { pushOutputPrefix(prefix); - if (param.level < param.Nlevel - 1) { // set parity for the solver in the transfer operator + if (!is_coarsest_grid()) { // set parity for the solver in the transfer operator QudaSiteSubset site_subset = param.coarse_grid_solution_type == QUDA_MATPC_SOLUTION ? QUDA_PARITY_SITE_SUBSET : QUDA_FULL_SITE_SUBSET; QudaMatPCType matpc_type = param.mg_global.invert_param->matpc_type; @@ -1257,7 +1247,7 @@ namespace quda if ( debug ) printfQuda("entering V-cycle with x2=%e, r2=%e\n", norm2(x), norm2(b)); - if (param.level < param.Nlevel-1) { + if (!is_coarsest_grid()) { //transfer->setTransferGPU(false); // use this to force location of transfer (need to check if still works for multi-level) // do the pre smoothing @@ -1406,22 +1396,128 @@ namespace quda void MG::dumpNullVectors() const { - if (param.transfer_type != QUDA_TRANSFER_AGGREGATE) { - warningQuda("Cannot dump near-null vectors for top level of staggered MG solve."); + if (!is_coarsest_grid()) { + if (param.transfer_type != QUDA_TRANSFER_AGGREGATE) { + warningQuda("Cannot dump near-null vectors for top level of staggered MG solve."); + } else { + saveVectors(param.B); + } + coarse->dumpNullVectors(); + } + } + + void MG::initializeRandomVectors(const std::vector &B) const { + for (auto b : B) { + spinorNoise(*r, *rng, QUDA_NOISE_UNIFORM); + *b = *r; + } + } + + void MG::constructNearNulls(bool refresh) { + + pushLevel(param.level); + + if (is_coarsest_grid()) { + logQuda(QUDA_VERBOSE, "On coarsest level, not generating new near nulls..."); + popLevel(); + return; + } + + logQuda(QUDA_VERBOSE, "Running vectors setup on level %d\n", param.level); + + auto setup_type = param.mg_global.setup_type[param.level]; + + // if a near-null vector input file is specified, always load it and + // ignore other setup information, UNLESS we're refreshing + if (strcmp(param.mg_global.vec_infile[param.level], "") != 0 && !refresh) { + loadVectors(param.B); } else { - saveVectors(param.B); + + // multiple setup iterations only matters for test vector setup + if (setup_type != QUDA_SETUP_NULL_VECTOR_TEST_VECTORS && + param.mg_global.num_setup_iter[param.level] > 1) + errorQuda("Multiple setup iterations can only be used with test vector setup"); + + if (setup_type == QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS) { + // Initializing to random vectors + if (!refresh) { + initializeRandomVectors(param.B); + } + + generateInverseIterations(param.B, refresh ? param.mg_global.setup_maxiter_refresh[param.level] : param.mg_global.setup_maxiter[param.level]); + } else { + // Other types support an inverse iterations "polish" + + if (setup_type == QUDA_SETUP_NULL_VECTOR_FREE_FIELD) { + // Generate free field near-null vectors. Consistency checks on the number of + // generated vectors are done within this function. + generateFreeVectors(param.B); + } else if (setup_type == QUDA_SETUP_NULL_VECTOR_RESTRICT_FINE) { + // Restrict near-null vectors from the finer level, generating extra if coarse Nvec > fine Nvec + generateRestrictedVectors(refresh); + } else if (setup_type == QUDA_SETUP_NULL_VECTOR_EIGENVECTORS) { + + // Run the eigensolver + generateEigenvectors(param.B); + } else if (setup_type == QUDA_SETUP_NULL_VECTOR_CHEBYSHEV_FILTER) { + + // Initializing to random vectors + if (!refresh) { + int num_initialize = param.mg_global.filter_startup_vectors[param.level]; + if (num_initialize > (int)param.B.size()) + errorQuda("Chebyshev filter num_initialize %d is greater than vectors to setup %d", num_initialize, (int)param.B.size()); + initializeRandomVectors(std::vector(param.B.begin(), param.B.begin() + num_initialize)); + } + + // Chebyshev filter + generateChebyshevFilter(param.B); + + } else if (setup_type == QUDA_SETUP_NULL_VECTOR_TEST_VECTORS) { + errorQuda("Test vector setup is currently not enabled."); + + for (int si = 0; si < param.mg_global.num_setup_iter[param.level]; si++) { + logQuda(QUDA_VERBOSE, "Running vectors setup on level %d iter %d of %d\n", param.level, si + 1, + param.mg_global.num_setup_iter[param.level]); + + // can we do test vectors + chebyshev filter setup? + generateInverseIterations(param.B, refresh ? param.mg_global.setup_maxiter_refresh[param.level] : param.mg_global.setup_maxiter[param.level]); + + // recurse when doing test setups, currently buggy + if (param.mg_global.setup_inv_type[param.level] == QUDA_MG_INVERTER) { + if (transfer) { + resetTransfer = true; + reset(); + if ( param.level < param.Nlevel - 2) { + coarse->constructNearNulls(); + } + } else { + reset(); + } + } + + } + + } else { + errorQuda("Invalid setup type %d", setup_type); + } + + if (param.mg_global.setup_maxiter_inverse_iterations_refinement[param.level] > 0) { + generateInverseIterations(param.B, param.mg_global.setup_maxiter_inverse_iterations_refinement[param.level]); + } + } + } - if (param.level < param.Nlevel - 2) coarse->dumpNullVectors(); + + popLevel(); } - void MG::generateNullVectors(std::vector &B, bool refresh) + void MG::generateInverseIterations(std::vector &B, int iterations) { pushLevel(param.level); SolverParam solverParam(param); // Set solver field parameters: // set null-space generation options - need to expose these - solverParam.maxiter - = refresh ? param.mg_global.setup_maxiter_refresh[param.level] : param.mg_global.setup_maxiter[param.level]; + solverParam.maxiter = (iterations == 0) ? param.mg_global.setup_maxiter_refresh[param.level] : iterations; solverParam.tol = param.mg_global.setup_tol[param.level]; solverParam.use_init_guess = QUDA_USE_INIT_GUESS_YES; solverParam.delta = 1e-1; @@ -1448,8 +1544,8 @@ namespace quda solverParam.precision_precondition = solverParam.precision; } - solverParam.residual_type = static_cast(QUDA_L2_RELATIVE_RESIDUAL); - solverParam.compute_null_vector = QUDA_COMPUTE_NULL_VECTOR_YES; + solverParam.residual_type = QUDA_L2_RELATIVE_RESIDUAL; + solverParam.compute_null_vector = true; ColorSpinorParam csParam(*B[0]); // Create spinor field parameters: csParam.setPrecision(r->Precision(), r->Precision(), true); // ensure native ordering csParam.location = QUDA_CUDA_FIELD_LOCATION; // hard code to GPU location for null-space generation for now @@ -1465,7 +1561,7 @@ namespace quda bool schwarz_reset = solverParam.inv_type != QUDA_MG_INVERTER && param.mg_global.smoother_schwarz_type[param.level] != QUDA_INVALID_SCHWARZ; if (schwarz_reset) { - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Disabling Schwarz for null-space finding"); + logQuda(QUDA_VERBOSE, "Disabling Schwarz for null-space finding"); int commDim[QUDA_MAX_DIM]; for (int i = 0; i < QUDA_MAX_DIM; i++) commDim[i] = 1; diracSmootherSloppy->setCommDim(commDim); @@ -1505,82 +1601,38 @@ namespace quda *param.matSmoothSloppy, profile); } - for (int si = 0; si < param.mg_global.num_setup_iter[param.level]; si++) { - if (getVerbosity() >= QUDA_VERBOSE) - printfQuda("Running vectors setup on level %d iter %d of %d\n", param.level, si + 1, - param.mg_global.num_setup_iter[param.level]); - - // global orthonormalization of the initial null-space vectors - if(param.mg_global.pre_orthonormalize) { - for(int i=0; i<(int)B.size(); i++) { - for (int j=0; j - caxpy(-alpha, *B[j], *B[i]); // i-j - } - double nrm2 = norm2(*B[i]); - if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *B[i]);// i/ - else errorQuda("\nCannot normalize %u vector\n", i); - } - } - - // launch solver for each source - for (int i=0; i<(int)B.size(); i++) { - if (param.mg_global.setup_type == QUDA_TEST_VECTOR_SETUP) { // DDalphaAMG test vector idea - b = *B[i]; // inverting against the vector - zero(x); // with zero initial guess - } else { - x = *B[i]; - zero(b); - } - - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial guess = %g\n", norm2(x)); - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Initial rhs = %g\n", norm2(b)); - - ColorSpinorField *out=nullptr, *in=nullptr; - diracSmoother->prepare(in, out, x, b, QUDA_MAT_SOLUTION); - (*solve)(*out, *in); - diracSmoother->reconstruct(x, b, QUDA_MAT_SOLUTION); + // global orthonormalization of the initial null-space vectors + if(param.mg_global.pre_orthonormalize) { + orthonormalizeVectors(B); + } - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Solution = %g\n", norm2(x)); - *B[i] = x; + // launch solver for each source + for (int i=0; i<(int)B.size(); i++) { + if (param.mg_global.setup_type[param.level] == QUDA_SETUP_NULL_VECTOR_TEST_VECTORS) { // DDalphaAMG test vector idea + b = *B[i]; // inverting against the vector + zero(x); // with zero initial guess + } else { + x = *B[i]; + zero(b); } - // global orthonormalization of the generated null-space vectors - if (param.mg_global.post_orthonormalize) { - for(int i=0; i<(int)B.size(); i++) { - for (int j=0; j - caxpy(-alpha, *B[j], *B[i]); // i-j - } - double nrm2 = norm2(*B[i]); - if (sqrt(nrm2) > 1e-16) ax(1.0/sqrt(nrm2), *B[i]);// i/ - else errorQuda("\nCannot normalize %u vector (nrm=%e)\n", i, sqrt(nrm2)); - } + if (getVerbosity() >= QUDA_VERBOSE) { + printfQuda("Initial guess = %g\n", norm2(x)); + printfQuda("Initial rhs = %g\n", norm2(b)); } - if (solverParam.inv_type == QUDA_MG_INVERTER) { - - if (transfer) { - resetTransfer = true; - reset(); - if ( param.level < param.Nlevel-2 ) { - if ( param.mg_global.generate_all_levels == QUDA_BOOLEAN_TRUE ) { - coarse->generateNullVectors(*B_coarse, refresh); - } else { - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Restricting null space vectors\n"); - for (int i=0; iR(*(*B_coarse)[i], *(param.B[i])); - } - // rebuild the transfer operator in the coarse level - coarse->resetTransfer = true; - coarse->reset(); - } - } - } else { - reset(); - } - } + ColorSpinorField *out=nullptr, *in=nullptr; + diracSmoother->prepare(in, out, x, b, QUDA_MAT_SOLUTION); + (*solve)(*out, *in); + diracSmoother->reconstruct(x, b, QUDA_MAT_SOLUTION); + + if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Solution = %g\n", norm2(x)); + *B[i] = x; + } + + // global orthonormalization of the generated null-space vectors + if (param.mg_global.post_orthonormalize) { + orthonormalizeVectors(B); } delete solve; @@ -1591,14 +1643,118 @@ namespace quda // reenable Schwarz if (schwarz_reset) { - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Reenabling Schwarz for null-space finding"); + logQuda(QUDA_VERBOSE, "Reenabling Schwarz for null-space finding"); int commDim[QUDA_MAX_DIM]; for (int i=0; isetCommDim(commDim); } - if (param.mg_global.vec_store[param.level] == QUDA_BOOLEAN_TRUE) { // conditional store of null vectors - saveVectors(B); + popLevel(); + } + + void MG::generateChebyshevFilter(std::vector &B) + { + pushLevel(param.level); + + // TBD: implement a refresh + + // use the right space; diracResidual is guaranteed to be full-parity + DiracMdagM dirac_mdm(diracResidual); + + // temporaries for Dirac applications + ColorSpinorField tmp1(*B[0]); + ColorSpinorField tmp2(*B[0]); + + // temporaries needed for Chebyshev filter + ColorSpinorField pk(*B[0]); + ColorSpinorField pkm1(*B[0]); + ColorSpinorField pkm2(*B[0]); + ColorSpinorField Apkm1(*B[0]); + + const int num_starting_vectors = param.mg_global.filter_startup_vectors[param.level]; + const int filter_iterations = param.mg_global.filter_startup_iterations[param.level]; + const int filter_rescale_freq = param.mg_global.filter_startup_rescale_frequency[param.level]; + const int iterations_for_next = param.mg_global.filter_iterations_between_vectors[param.level]; + const double lambda_min = param.mg_global.filter_lambda_min[param.level]; + double lambda_max = param.mg_global.filter_lambda_max[param.level]; + if (lambda_max < lambda_min) { + // approximate lambda_max + PolynomialBasisParams basis_params; + basis_params.basis = QUDA_POWER_BASIS; + basis_params.n_order = 100; + basis_params.normalize_freq = 10; + basis_params.tmp_vectors.emplace_back(std::ref(pk)); + basis_params.tmp_vectors.emplace_back(std::ref(pkm1)); + lambda_max = 1.1 * performPowerIterations(dirac_mdm, *B[0], basis_params, tmp1, tmp2); + } + + logQuda(QUDA_VERBOSE, "Nums starting %d Filter iter %d Filter rescale %d Filter next %d Lambda min %e lambda max %e\n", + num_starting_vectors, filter_iterations, filter_rescale_freq, iterations_for_next, lambda_min, lambda_max); + + // create filter basis info + PolynomialBasisParams filter_basis_params; + filter_basis_params.basis = QUDA_CHEBYSHEV_BASIS; + filter_basis_params.n_order = filter_iterations; + filter_basis_params.normalize_freq = filter_rescale_freq; + filter_basis_params.tmp_vectors.emplace_back(std::ref(pk)); + filter_basis_params.tmp_vectors.emplace_back(std::ref(pkm1)); + filter_basis_params.tmp_vectors.emplace_back(std::ref(pkm2)); + filter_basis_params.tmp_vectors.emplace_back(std::ref(Apkm1)); + filter_basis_params.m_map = PolynomialBasisParams::compute_m_map(lambda_min, lambda_max); + filter_basis_params.b_map = PolynomialBasisParams::compute_b_map(lambda_min, lambda_max); + + // create basis info for polynomial that generates more near-nulls + // note that n_order is intentionally left empty, since that can + // vary if the number of starting vectors does not divide evenly + // into the total number of near-nulls + PolynomialBasisParams generation_basis_params; + generation_basis_params.basis = QUDA_CHEBYSHEV_BASIS; + generation_basis_params.normalize_freq = 0; + generation_basis_params.tmp_vectors.emplace_back(std::ref(pk)); + generation_basis_params.tmp_vectors.emplace_back(std::ref(pkm1)); + generation_basis_params.tmp_vectors.emplace_back(std::ref(pkm2)); + generation_basis_params.tmp_vectors.emplace_back(std::ref(Apkm1)); + generation_basis_params.m_map = PolynomialBasisParams::compute_m_map(0., lambda_max); + generation_basis_params.b_map = PolynomialBasisParams::compute_b_map(0., lambda_max); + + // we create batches of near-nulls from `B.size() / vectors_per_set` starting + // random vectors + int num_vec = static_cast(B.size()); + + // orthonormalize + if (param.mg_global.pre_orthonormalize) { + orthonormalizeVectors(B, num_starting_vectors); + } + + for (int s = 0; s < num_starting_vectors; s++) { + + // Apply the initial Chebyshev filter + applyMatrixPolynomial(dirac_mdm, *B[s], *B[s], filter_basis_params, tmp1, tmp2); + + // Now we generate further vectors by another Chebyshev filter from 0 -> lambda_max + // Populate the array of outputs + std::vector output_vecs; + for (int i = s + num_starting_vectors; i < num_vec; i += num_starting_vectors) + output_vecs.emplace_back(std::ref(*B[i])); + + // the number we need to save is the size of output_vecs; the order of the polynomial is + // iterations_for_next times the size + generation_basis_params.n_order = output_vecs.size() * iterations_for_next; + + // Generate additional near-null vectors + applyMatrixPolynomial(dirac_mdm, output_vecs, *B[s], generation_basis_params, iterations_for_next, tmp1, tmp2); + + } + + // print norms + logQuda(QUDA_VERBOSE, "Norms of Chebyshev filtered near-null vectors:\n"); + for (int i = 0; i < num_vec; i++) { + logQuda(QUDA_VERBOSE, "Vector %d = %e\n", i, sqrt(blas::norm2(*B[i]))); + } + + // global orthonormalization of the generated null-space vectors + if (param.mg_global.post_orthonormalize) { + orthonormalizeVectors(B); } popLevel(); @@ -1606,7 +1762,7 @@ namespace quda // generate a full span of free vectors. // FIXME: Assumes fine level is SU(3). - void MG::buildFreeVectors(std::vector &B) + void MG::generateFreeVectors(std::vector& B) { pushLevel(param.level); const int Nvec = B.size(); @@ -1616,6 +1772,9 @@ namespace quda const int Ncolor = B[0]->Ncolor(); const int Nspin = B[0]->Nspin(); + // Zero the null vectors + for (auto b : B) zero(*b); + if (Ncolor == 3) // fine level { if (Nspin == 4) // Wilson or Twisted Mass (singlet) @@ -1623,11 +1782,7 @@ namespace quda // There needs to be 6 null vectors -> 12 after chirality. if (Nvec != 6) errorQuda("\nError in MG::buildFreeVectors: Wilson-type fermions require Nvec = 6"); - if (getVerbosity() >= QUDA_VERBOSE) - printfQuda("Building %d free field vectors for Wilson-type fermions\n", Nvec); - - // Zero the null vectors. - for (int i = 0; i < Nvec; i++) zero(*B[i]); + logQuda(QUDA_VERBOSE, "Building %d free field vectors for Wilson-type fermions\n", Nvec); // Create a temporary vector. ColorSpinorParam csParam(*B[0]); @@ -1650,11 +1805,7 @@ namespace quda // There needs to be 24 null vectors -> 48 after chirality. if (Nvec != 24) errorQuda("\nError in MG::buildFreeVectors: Staggered-type fermions require Nvec = 24\n"); - if (getVerbosity() >= QUDA_VERBOSE) - printfQuda("Building %d free field vectors for Staggered-type fermions\n", Nvec); - - // Zero the null vectors. - for (int i = 0; i < Nvec; i++) zero(*B[i]); + logQuda(QUDA_VERBOSE, "Building %d free field vectors for Staggered-type fermions\n", Nvec); // Create a temporary vector. ColorSpinorParam csParam(*B[0]); @@ -1663,55 +1814,15 @@ namespace quda // Build free null vectors. for (int c = 0; c < B[0]->Ncolor(); c++) { - // Need to pair an even+odd corner together - // since they'll get split up. - // 0000, 0001 - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x0, c); - xpy(tmp, *B[8 * c + 0]); - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x1, c); - xpy(tmp, *B[8 * c + 0]); - - // 0010, 0011 - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x2, c); - xpy(tmp, *B[8 * c + 1]); - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x3, c); - xpy(tmp, *B[8 * c + 1]); - - // 0100, 0101 - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x4, c); - xpy(tmp, *B[8 * c + 2]); - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x5, c); - xpy(tmp, *B[8 * c + 2]); - - // 0110, 0111 - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x6, c); - xpy(tmp, *B[8 * c + 3]); - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x7, c); - xpy(tmp, *B[8 * c + 3]); - - // 1000, 1001 - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x8, c); - xpy(tmp, *B[8 * c + 4]); - tmp.Source(QUDA_CORNER_SOURCE, 1, 0x9, c); - xpy(tmp, *B[8 * c + 4]); - - // 1010, 1011 - tmp.Source(QUDA_CORNER_SOURCE, 1, 0xA, c); - xpy(tmp, *B[8 * c + 5]); - tmp.Source(QUDA_CORNER_SOURCE, 1, 0xB, c); - xpy(tmp, *B[8 * c + 5]); - - // 1100, 1101 - tmp.Source(QUDA_CORNER_SOURCE, 1, 0xC, c); - xpy(tmp, *B[8 * c + 6]); - tmp.Source(QUDA_CORNER_SOURCE, 1, 0xD, c); - xpy(tmp, *B[8 * c + 6]); - - // 1110, 1111 - tmp.Source(QUDA_CORNER_SOURCE, 1, 0xE, c); - xpy(tmp, *B[8 * c + 7]); - tmp.Source(QUDA_CORNER_SOURCE, 1, 0xF, c); - xpy(tmp, *B[8 * c + 7]); + // Need to pair an even+odd corner together since they'll get split up + // during chiral doubling. Vector 0 is `0000`,`0001`; + // vector 1 is `0010`,`0011`, etc. + for (int corner = 0; corner < 8; corner++) { + tmp.Source(QUDA_CORNER_SOURCE, 1, 2 * corner, c); + xpy(tmp, *B[8 * c + corner]); + tmp.Source(QUDA_CORNER_SOURCE, 1, 2 * corner + 1, c); + xpy(tmp, *B[8 * c + corner]); + } } } else { @@ -1722,10 +1833,7 @@ namespace quda // There needs to be Ncolor null vectors. if (Nvec != Ncolor) errorQuda("\nError in MG::buildFreeVectors: Coarse fermions require Nvec = Ncolor"); - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Building %d free field vectors for Coarse fermions\n", Ncolor); - - // Zero the null vectors. - for (int i = 0; i < Nvec; i++) zero(*B[i]); + logQuda(QUDA_VERBOSE, "Building %d free field vectors for Coarse fermions\n", Ncolor); // Create a temporary vector. ColorSpinorParam csParam(*B[0]); @@ -1743,10 +1851,7 @@ namespace quda // There needs to be Ncolor null vectors. if (Nvec != Ncolor) errorQuda("\nError in MG::buildFreeVectors: Coarse fermions require Nvec = Ncolor"); - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Building %d free field vectors for Coarse fermions\n", Ncolor); - - // Zero the null vectors. - for (int i = 0; i < Nvec; i++) zero(*B[i]); + logQuda(QUDA_VERBOSE, "Building %d free field vectors for Coarse fermions\n", Ncolor); // Create a temporary vector. ColorSpinorParam csParam(*B[0]); @@ -1765,81 +1870,161 @@ namespace quda // global orthonormalization of the generated null-space vectors if(param.mg_global.post_orthonormalize) { - for(int i=0; i<(int)B.size(); i++) { - double nrm2 = norm2(*B[i]); - if (nrm2 > 1e-16) ax(1.0 /sqrt(nrm2), *B[i]);// i/ - else errorQuda("\nCannot normalize %u vector\n", i); - } + orthonormalizeVectors(B); } - if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Done building free vectors\n"); + logQuda(QUDA_VERBOSE, "Done building free vectors\n"); popLevel(); } - void MG::generateEigenVectors() + void MG::generateEigenvectors(std::vector& B, bool post_restrict) { pushLevel(param.level); - // Extract eigensolver params - int n_conv = param.mg_global.eig_param[param.level]->n_conv; - bool dagger = param.mg_global.eig_param[param.level]->use_dagger; - bool normop = param.mg_global.eig_param[param.level]->use_norm_op; + if (param.mg_global.eig_param[param.level] == nullptr) + errorQuda("eig_param for level %d has not been populated", param.level); + + // Extract eigensolver params; we create a copy by value because we may override + // some values when we restrict first + auto eig_param = *param.mg_global.eig_param[param.level]; + + // We need to compute fewer eigenvectors if we've restricted some fine + // near-nulls first. + if (post_restrict) { + eig_param.n_conv = static_cast(B.size()); + eig_param.n_ev_deflate = static_cast(B.size()); + } + + int n_conv = eig_param.n_conv; + bool dagger = eig_param.use_dagger; + bool normop = eig_param.use_norm_op; // Dummy array to keep the eigensolver happy. std::vector evals(n_conv, 0.0); - std::vector B_evecs; - ColorSpinorParam csParam(*param.B[0]); - csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS; + ColorSpinorParam csParam(*B[0]); + if (csParam.siteSubset == QUDA_PARITY_SITE_SUBSET) { + csParam.siteSubset = QUDA_FULL_SITE_SUBSET; + csParam.x[0] *= 2; + } + if (B[0]->Nspin() == 4) csParam.gammaBasis = QUDA_UKQCD_GAMMA_BASIS; csParam.create = QUDA_ZERO_FIELD_CREATE; // This is the vector precision used by matResidual csParam.setPrecision(param.mg_global.invert_param->cuda_prec_sloppy, QUDA_INVALID_PRECISION, true); for (int i = 0; i < n_conv; i++) B_evecs.push_back(new ColorSpinorField(csParam)); - // before entering the eigen solver, let's free the B vectors to save some memory - ColorSpinorParam bParam(*param.B[0]); - for (int i = 0; i < (int)param.B.size(); i++) delete param.B[i]; - EigenSolver *eig_solve; if (!normop && !dagger) { DiracM *mat = new DiracM(*diracResidual); - eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile); + eig_solve = EigenSolver::create(&eig_param, *mat, profile); (*eig_solve)(B_evecs, evals); delete eig_solve; delete mat; } else if (!normop && dagger) { DiracMdag *mat = new DiracMdag(*diracResidual); - eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile); + eig_solve = EigenSolver::create(&eig_param, *mat, profile); (*eig_solve)(B_evecs, evals); delete eig_solve; delete mat; } else if (normop && !dagger) { DiracMdagM *mat = new DiracMdagM(*diracResidual); - eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile); + eig_solve = EigenSolver::create(&eig_param, *mat, profile); (*eig_solve)(B_evecs, evals); delete eig_solve; delete mat; } else if (normop && dagger) { DiracMMdag *mat = new DiracMMdag(*diracResidual); - eig_solve = EigenSolver::create(param.mg_global.eig_param[param.level], *mat, profile); + eig_solve = EigenSolver::create(&eig_param, *mat, profile); (*eig_solve)(B_evecs, evals); delete eig_solve; delete mat; } - // now reallocate the B vectors copy in e-vectors - for (int i = 0; i < (int)param.B.size(); i++) { - param.B[i] = new ColorSpinorField(bParam); - *param.B[i] = *B_evecs[i]; + // copy e-vectors back in + for (int i = 0; i < (int)B.size(); i++) { + if (B[0]->SiteSubset() == QUDA_PARITY_SITE_SUBSET) + *B[i] = B_evecs[i]->Even(); + else + *B[i] = *B_evecs[i]; } // Local clean-up for (auto b : B_evecs) { delete b; } - // only save if outfile is defined - if (strcmp(param.mg_global.vec_outfile[param.level], "") != 0) { saveVectors(param.B); } + popLevel(); + } + + void MG::generateRestrictedVectors(bool refresh) + { + pushLevel(param.level); + + if (is_fine_grid()) errorQuda("There are no fine near-null vectors to restrict on level 0"); + if (param.Nvec != param.fine->param.Nvec) + logQuda(QUDA_VERBOSE, "The number of near-null vectors on the fine level (%d) and coarse level (%d) do not match, restricting as many as possible\n", param.fine->param.Nvec, param.Nvec); + + logQuda(QUDA_VERBOSE, "Restricting null space vectors\n"); + + // B vectors can be in half precession on the fine level, need to convert to single + ColorSpinorField Btmp; + bool need_tmp_single = param.fine->param.B[0]->Precision() < QUDA_SINGLE_PRECISION; + if (need_tmp_single) { + ColorSpinorParam b_tmp_param(*param.fine->param.B[0]); + b_tmp_param.create = QUDA_NULL_FIELD_CREATE; + b_tmp_param.setPrecision(QUDA_SINGLE_PRECISION, QUDA_INVALID_PRECISION, + b_tmp_param.location == QUDA_CUDA_FIELD_LOCATION ? true : false); + Btmp = ColorSpinorField(b_tmp_param); + } + + for (int i = 0; i < param.fine->param.Nvec; i++) { + zero(*(param.B[i])); + if (need_tmp_single) Btmp = *(param.fine->param.B[i]); + ColorSpinorField& Bfine = need_tmp_single ? Btmp : *(param.fine->param.B[i]); + param.fine->transfer->R(*(param.B[i]), Bfine); + } + + // generate more if need be + if (param.Nvec != param.fine->param.Nvec) { + auto setup_remaining_type = param.mg_global.setup_restrict_remaining_type[param.level]; + + auto B_remaining = std::vector(param.B.begin() + param.fine->param.Nvec, param.B.end()); + + if (setup_remaining_type == QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS) { + if (!refresh) { + initializeRandomVectors(B_remaining); + } + if (param.mg_global.pre_orthonormalize) { + orthonormalizeVectors(param.B); + } + + generateInverseIterations(B_remaining, refresh ? param.mg_global.setup_maxiter_refresh[param.level] : param.mg_global.setup_maxiter[param.level]); + } else if (setup_remaining_type == QUDA_SETUP_NULL_VECTOR_CHEBYSHEV_FILTER) { + int num_initialize = param.mg_global.filter_startup_vectors[param.level]; + if (!refresh) { + if (num_initialize > (int)B_remaining.size()) + errorQuda("Chebyshev filter num_initialize %d is greater than vectors to setup %d", num_initialize, (int)B_remaining.size()); + + initializeRandomVectors(std::vector(B_remaining.begin(), B_remaining.begin() + num_initialize)); + } + + if (param.mg_global.pre_orthonormalize) { + orthonormalizeVectors(std::vector(param.B.begin(), param.B.begin() + param.fine->param.Nvec + num_initialize)); + } + + generateChebyshevFilter(B_remaining); + } else if (setup_remaining_type == QUDA_SETUP_NULL_VECTOR_EIGENVECTORS) { + // nothing to initialize, though there may be scope to reuse with Block Lanczos + // once it's performant + generateEigenvectors(B_remaining, true); + } else { + errorQuda("Unsupported remaining near-null vector type %d", setup_remaining_type); + } + } + + if (param.mg_global.post_orthonormalize) { + orthonormalizeVectors(param.B); + } popLevel(); } diff --git a/lib/polynomial.hpp b/lib/polynomial.hpp new file mode 100644 index 0000000000..aff3386096 --- /dev/null +++ b/lib/polynomial.hpp @@ -0,0 +1,260 @@ + +#include +#include +#include + +namespace quda +{ + + struct PolynomialBasisParams { + + /** What basis polynomial are we using */ + QudaPolynomialBasis basis; + + /** What order polynomial are we generating? */ + int n_order; + + /** How often are we re-normalizing the vector to avoid overflow + Set to 0 to never normalize */ + int normalize_freq; + + /** Chebyshev basis: slope of mapping onto [-1,1] */ + double m_map; + + /** Chebyshev basis: intercept of mapping onto [-1,1] */ + double b_map; + + /** Vector of temporary vectors; need 2 for power iterations, 4 for chebyshev basis */ + std::vector tmp_vectors; + + PolynomialBasisParams() : + basis(QUDA_INVALID_BASIS), + n_order(0), + normalize_freq(0), + m_map(0.0), + b_map(0.0), + tmp_vectors() { } + + static double compute_m_map(double lambda_min, double lambda_max) { return 2. / (lambda_max - lambda_min); } + static double compute_b_map(double lambda_min, double lambda_max) { return - (lambda_max + lambda_min) / (lambda_max - lambda_min); } + + static void check_params(const PolynomialBasisParams& params, bool skip_temporaries_check = false) { + if (params.n_order < 0) errorQuda("Invalid polynomial order %d", params.n_order); + if (params.normalize_freq < 0) errorQuda("Invalid rescale frequency %d", params.normalize_freq); + + // the temporary check does not need to be done for CA basis generation + if (!skip_temporaries_check) { + switch (params.basis) { + case QUDA_POWER_BASIS: + if (params.tmp_vectors.size() < 2) errorQuda("Invalid temporary vector count %lu for power basis, expected 2", params.tmp_vectors.size()); + break; + case QUDA_CHEBYSHEV_BASIS: + if (params.m_map < 0) errorQuda("Invalid m_map %e, implies lambda_min >= lambda_max", params.m_map); + if (params.tmp_vectors.size() < 4) errorQuda("Invalid temporary vector count %lu for Chebyshev basis, expected 4", params.tmp_vectors.size()); + break; + default: errorQuda("Polynomial basis is unspecified"); + } + } + } + }; + + /** + @brief Apply a polynomial basis to a starting vector, optionally saving with some frequency + @param[in] diracm Dirac matrix used for the polynomial + @param[out] output_vecs Output vectors, which may be more than one if the save frquency is non-zero + @param[in] start_vec Starting vector for polynomial application + @param[in] poly_params Parameters for the polynomial application + @param[in] save_freq Frequency with which vectors are saved to output_vecs + @param[in] args Parameter pack of ColorSpinorFields used as temporaries passed to Dirac + */ + template + void applyMatrixPolynomial(const DiracMatrix &diracm, std::vector &output_vecs, + const ColorSpinorField &start_vec, const PolynomialBasisParams &poly_params, int save_freq, + Args &&...args) + { + PolynomialBasisParams::check_params(poly_params); + // if the save_frequency is 0, make sure output_vecs is of length 1 + if (save_freq < 0) + errorQuda("Invalid save frequency %lu", output_vecs.size()); + else if (save_freq == 0 && output_vecs.size() != 1) + errorQuda("Invalid vector length %lu", output_vecs.size()); + else if (save_freq > 0 && poly_params.n_order % save_freq != 0) + errorQuda("Saving frequency %d does not evenly divide into polynomial order %d", save_freq, poly_params.n_order); + else if (save_freq > 0 && static_cast(output_vecs.size()) != poly_params.n_order / save_freq) + errorQuda("Invalid vector length %lu, expected %d", output_vecs.size(), poly_params.n_order / save_freq); + + int save_count = 0; + + if (poly_params.basis == QUDA_POWER_BASIS) { + ColorSpinorField &tempvec1 = poly_params.tmp_vectors[0]; + ColorSpinorField &tempvec2 = poly_params.tmp_vectors[1]; + blas::copy(tempvec1, start_vec); // no-op if fieds alias + + for (int i = 1; i <= poly_params.n_order; i++) { + diracm(tempvec2, tempvec1, args...); + if (save_freq > 0 && i % save_freq == 0) + blas::copy(output_vecs[save_count++], tempvec2); + if (poly_params.normalize_freq > 0 && i % poly_params.normalize_freq == 0) { + double tmp_nrm = sqrt(blas::norm2(tempvec2)); + logQuda(QUDA_VERBOSE, "Triggered rescale during matrix polynomial application; norm at rescale is %e\n", tmp_nrm); + blas::ax(1.0 / tmp_nrm, tempvec2); + } + std::swap(tempvec1, tempvec2); + } + + // if save_freq != 0, the last vector has already been saved into output_vecs + if (save_freq == 0) blas::copy(output_vecs[0], tempvec1); + } else if (poly_params.basis == QUDA_CHEBYSHEV_BASIS) { + + ColorSpinorField &pk = poly_params.tmp_vectors[0]; + ColorSpinorField &pkm1 = poly_params.tmp_vectors[1]; + ColorSpinorField &pkm2 = poly_params.tmp_vectors[2]; + ColorSpinorField &Apkm1 = poly_params.tmp_vectors[3]; + + int save_count = 0; + + // P0 + blas::copy(pk, start_vec); + + if (poly_params.n_order > 0) { + // P1 = m Ap_0 + b p_0 + std::swap(pkm1, pk); // p_k -> p_{k - 1} + diracm(Apkm1, pkm1, args...); + blas::axpbyz(poly_params.m_map, Apkm1, poly_params.b_map, pkm1, pk); + + if (save_freq == 1) blas::copy(output_vecs[save_count++], pk); + if (poly_params.normalize_freq == 1) { + double tmp_nrm = sqrt(blas::norm2(pk)); + logQuda(QUDA_VERBOSE, "Triggered rescale during matrix polynomial application; norm at rescale is %e\n", tmp_nrm); + double tmp_inv_nrm = 1. / tmp_nrm; + blas::ax(tmp_inv_nrm, pk); + blas::ax(tmp_inv_nrm, pkm1); + blas::ax(tmp_inv_nrm, Apkm1); + } + + if (poly_params.n_order > 1) { + // Enter recursion relation + for (int k = 2; k <= poly_params.n_order; k++) { + std::swap(pkm2, pkm1); // p_{k - 1} -> p_{k-2} + std::swap(pkm1, pk); // p_k -> p_{k-1} + diracm(Apkm1, pkm1, args...); // compute A p_{k-1} + blas::axpbypczw(2. * poly_params.m_map, Apkm1, 2. * poly_params.b_map, pkm1, -1., pkm2, pk); + + if (save_freq > 0 && k % save_freq == 0) blas::copy(output_vecs[save_count++], pk); + // heuristic rescale to keep norms in check... + if (poly_params.normalize_freq > 0 && k % poly_params.normalize_freq == 0) { + double tmp_nrm = sqrt(blas::norm2(pk)); + logQuda(QUDA_VERBOSE, "Triggered rescale during matrix polynomial application; norm at rescale is %e\n", tmp_nrm); + double tmp_inv_nrm = 1. / tmp_nrm; + blas::ax(tmp_inv_nrm, pk); + blas::ax(tmp_inv_nrm, pkm1); + blas::ax(tmp_inv_nrm, pkm2); + blas::ax(tmp_inv_nrm, Apkm1); + } + } + } + } + + // if save_freq != 0, the last vector has already been saved into output_vecs + if (save_freq == 0) blas::copy(output_vecs[0], pk); + } else { + errorQuda("Invalid basis %d", poly_params.basis); + } + } + + /** + @brief Apply a polynomial basis to a starting vector + @param[in] diracm Dirac matrix used for the polynomial + @param[out] output_vec Output vector + @param[in] start_vec Starting vector for polynomial application + @param[in] poly_params Parameters for the polynomial application + @param[in] args Parameter pack of ColorSpinorFields used as temporaries passed to Dira + */ + template + void applyMatrixPolynomial(const DiracMatrix &diracm, ColorSpinorField &output_vec, + const ColorSpinorField &start_vec, const PolynomialBasisParams &poly_params, + Args &&...args) + { + std::vector output_vecs; + output_vecs.emplace_back(std::ref(output_vec)); + applyMatrixPolynomial(diracm, output_vecs, start_vec, poly_params, 0, args...); + } + + /** + @brief Compute power iterations on a Dirac matrix + @param[in] diracm Dirac matrix used for power iterations + @param[in] start Starting rhs for power iterations; value preserved unless it aliases temporary vectors + @param[in,out] poly_params Parameters for polynomial application, must correspond to power iterations + @param[in] args Parameter pack of ColorSpinorFields used as temporary passed to Dirac + @return Norm of final power iteration result + */ + template + double performPowerIterations(const DiracMatrix &diracm, const ColorSpinorField &start, + PolynomialBasisParams &poly_params, Args &&...args) + { + PolynomialBasisParams::check_params(poly_params); + if (poly_params.basis != QUDA_POWER_BASIS) errorQuda("Invalid basis %d", poly_params.basis); + + auto tempvec1 = poly_params.tmp_vectors[0]; + auto tempvec2 = poly_params.tmp_vectors[1]; + + applyMatrixPolynomial(diracm, tempvec1, start, poly_params, args...); + + // Get Rayleigh quotient + double tmpnrm = sqrt(blas::norm2(tempvec1)); + blas::ax(1.0 / tmpnrm, tempvec1); + diracm(tempvec2, tempvec1, args...); + double lambda_max = sqrt(blas::norm2(tempvec2)); + logQuda(QUDA_VERBOSE, "Power iterations approximate max = %e\n", lambda_max); + + return lambda_max; + } + + /** + @brief Apply a polynomial basis to a starting vector, optionally saving with some frequency + @param[in] diracm Dirac matrix used for the polynomial + @param[out] Ap dirac matrix times the Krylov basis vectors + @param[in,out] p Krylov basis vectors; assumes p[0] is in place + @param[in] poly_params Parameters for the polynomial application + @param[in] args Parameter pack of ColorSpinorFields used as temporaries passed to Dirac + */ + template + void computeCAKrylovSpace(const DiracMatrix &diracm, std::vector &Ap, + std::vector &p, PolynomialBasisParams poly_params, Args &&...args) + { + PolynomialBasisParams::check_params(poly_params, true); + + auto n_krylov = poly_params.n_order; + + // in some cases p or Ap may be larger + if (static_cast(p.size()) < n_krylov) errorQuda("Invalid p.size() %lu < n_krylov %d", p.size(), n_krylov); + if (static_cast(Ap.size()) < n_krylov) errorQuda("Invalid Ap.size() %lu < n_krylov %d", Ap.size(), n_krylov); + + if (poly_params.basis == QUDA_POWER_BASIS) { + for (int k = 0; k < n_krylov; k++) { + diracm(Ap[k], p[k], args...); + if (k < (n_krylov - 1)) blas::copy(p[k + 1], Ap[k]); // no op if fields alias, which is often the case + } + } else if (poly_params.basis == QUDA_CHEBYSHEV_BASIS) { + diracm(Ap[0], p[0], args...); + + if (n_krylov > 1) { + // p_1 = m Ap_0 + b p_0 + blas::axpbyz(poly_params.m_map, Ap[0], poly_params.b_map, p[0], p[1]); + diracm(Ap[1], p[1], args...); + + // Enter recursion relation + if (n_krylov > 2) { + // p_k = 2 m A[_{k-1} + 2 b p_{k-1} - p_{k-2} + for (int k = 2; k < n_krylov; k++) { + blas::axpbypczw(2. * poly_params.m_map, Ap[k - 1], 2. * poly_params.b_map, p[k - 1], -1., p[k - 2], p[k]); + diracm(Ap[k], p[k], args...); + } + } + } + } else { + errorQuda("Invalid basis %d", poly_params.basis); + } + } + +} // namespace quda diff --git a/lib/quda_fortran.F90 b/lib/quda_fortran.F90 index 01a792558a..f8fd660c88 100644 --- a/lib/quda_fortran.F90 +++ b/lib/quda_fortran.F90 @@ -261,7 +261,7 @@ module quda_fortran real(8) :: omega ! Basis for CA algorithms - QudaCABasis :: ca_basis + QudaPolynomialBasis :: ca_basis ! Minimum eigenvalue for Chebyshev CA basis real(8) :: ca_lambda_min @@ -270,7 +270,7 @@ module quda_fortran real(8) :: ca_lambda_max ! Basis for CA algorithms in preconditioner solvers - QudaCABasis :: ca_basis_precondition + QudaPolynomialBasis :: ca_basis_precondition ! Minimum eigenvalue for Chebyshev CA basis in preconditioner solvers real(8) :: ca_lambda_min_precondition diff --git a/lib/solver.cpp b/lib/solver.cpp index cd2f0b6b91..080b14a389 100644 --- a/lib/solver.cpp +++ b/lib/solver.cpp @@ -450,4 +450,24 @@ namespace quda { } } + /** + @brief Returns if a solver supports deflation or not + @return true if solver supports deflation, false otherwise + */ + bool is_deflatable_solver(QudaInverterType type) { + switch (type) { + case QUDA_CG_INVERTER: + case QUDA_CGNR_INVERTER: + case QUDA_CGNE_INVERTER: + case QUDA_PCG_INVERTER: + case QUDA_CA_CG_INVERTER: + case QUDA_CA_CGNR_INVERTER: + case QUDA_CA_CGNE_INVERTER: + case QUDA_GCR_INVERTER: + case QUDA_CA_GCR_INVERTER: + case QUDA_BICGSTABL_INVERTER: return true; + default: return false; + } + } + } // namespace quda diff --git a/lib/solver.hpp b/lib/solver.hpp deleted file mode 100644 index ab93451aef..0000000000 --- a/lib/solver.hpp +++ /dev/null @@ -1,96 +0,0 @@ - -#include -#include -#include -#include - -namespace quda -{ - - /** - @brief Compute power iterations on a Dirac matrix - @param[in] diracm Dirac matrix used for power iterations - @param[in] start Starting rhs for power iterations; value preserved unless it aliases tempvec1 or tempvec2 - @param[in,out] tempvec1 Temporary vector used for power iterations - @param[in,out] tempvec2 Temporary vector used for power iterations - @param[in] niter Total number of power iteration iterations - @param[in] normalize_freq Frequency with which intermediate vector gets normalized - @param[in] args Parameter pack of ColorSpinorFields used as temporary passed to Dirac - @return Norm of final power iteration result - */ - template - double Solver::performPowerIterations(const DiracMatrix &diracm, const ColorSpinorField &start, - ColorSpinorField &tempvec1, ColorSpinorField &tempvec2, int niter, - int normalize_freq, Args &&...args) - { - checkPrecision(tempvec1, tempvec2); - blas::copy(tempvec1, start); // no-op if fields alias - - // Do niter iterations, normalize every normalize_freq - for (int i = 0; i < niter; i++) { - if (normalize_freq > 0 && i % normalize_freq == 0) { - double tmpnrm = sqrt(blas::norm2(tempvec1)); - blas::ax(1.0 / tmpnrm, tempvec1); - } - diracm(tempvec2, tempvec1, args...); - if (normalize_freq > 0 && i % normalize_freq == 0) { - logQuda(QUDA_VERBOSE, "Current Rayleigh Quotient step %d is %e\n", i, sqrt(blas::norm2(tempvec2))); - } - std::swap(tempvec1, tempvec2); - } - // Get Rayleigh quotient - double tmpnrm = sqrt(blas::norm2(tempvec1)); - blas::ax(1.0 / tmpnrm, tempvec1); - diracm(tempvec2, tempvec1, args...); - double lambda_max = sqrt(blas::norm2(tempvec2)); - logQuda(QUDA_VERBOSE, "Power iterations approximate max = %e\n", lambda_max); - - return lambda_max; - } - - /** - @brief Generate a Krylov space in a given basis - @param[in] diracm Dirac matrix used to generate the Krylov space - @param[out] Ap dirac matrix times the Krylov basis vectors - @param[in,out] p Krylov basis vectors; assumes p[0] is in place - @param[in] n_krylov Size of krylov space - @param[in] basis Basis type - @param[in] m_map Slope mapping for Chebyshev basis; ignored for power basis - @param[in] b_map Intercept mapping for Chebyshev basis; ignored for power basis - @param[in] args Parameter pack of ColorSpinorFields used as temporary passed to Dirac - */ - template - void Solver::computeCAKrylovSpace(const DiracMatrix &diracm, std::vector &Ap, - std::vector &p, int n_krylov, QudaCABasis basis, double m_map, - double b_map, Args &&...args) - { - // in some cases p or Ap may be larger - if (static_cast(p.size()) < n_krylov) errorQuda("Invalid p.size() %lu < n_krylov %d", p.size(), n_krylov); - if (static_cast(Ap.size()) < n_krylov) errorQuda("Invalid Ap.size() %lu < n_krylov %d", Ap.size(), n_krylov); - - if (basis == QUDA_POWER_BASIS) { - for (int k = 0; k < n_krylov; k++) { - diracm(Ap[k], p[k], args...); - if (k < (n_krylov - 1)) blas::copy(p[k + 1], Ap[k]); // no op if fields alias, which is often the case - } - } else { // chebyshev basis - diracm(Ap[0], p[0], args...); - - if (n_krylov > 1) { - // p_1 = m Ap_0 + b p_0 - blas::axpbyz(m_map, Ap[0], b_map, p[0], p[1]); - diracm(Ap[1], p[1], args...); - - // Enter recursion relation - if (n_krylov > 2) { - // p_k = 2 m A[_{k-1} + 2 b p_{k-1} - p_{k-2} - for (int k = 2; k < n_krylov; k++) { - blas::axpbypczw(2. * m_map, Ap[k - 1], 2. * b_map, p[k - 1], -1., p[k - 2], p[k]); - diracm(Ap[k], p[k], args...); - } - } - } - } - } - -} // namespace quda diff --git a/lib/staggered_kd_build_xinv.cu b/lib/staggered_kd_build_xinv.cu index b109bc2388..3441feede0 100644 --- a/lib/staggered_kd_build_xinv.cu +++ b/lib/staggered_kd_build_xinv.cu @@ -271,6 +271,7 @@ namespace quda { std::shared_ptr AllocateAndBuildStaggeredKahlerDiracInverse(const cudaGaugeField &gauge, const double mass, const bool dagger_approximation) { GaugeFieldParam gParam(gauge); + gParam.location = QUDA_CUDA_FIELD_LOCATION; gParam.reconstruct = QUDA_RECONSTRUCT_NO; gParam.create = QUDA_NULL_FIELD_CREATE; gParam.geometry = QUDA_KDINVERSE_GEOMETRY; diff --git a/lib/transfer.cpp b/lib/transfer.cpp index dadc1298f8..9489debca2 100644 --- a/lib/transfer.cpp +++ b/lib/transfer.cpp @@ -455,7 +455,7 @@ namespace quda { if (use_gpu) { if (out.Location() == QUDA_CPU_FIELD_LOCATION) output = coarse_tmp_d; - if (in.Location() == QUDA_CPU_FIELD_LOCATION || in.GammaBasis() != V->GammaBasis()) + if (in.Location() == QUDA_CPU_FIELD_LOCATION || in.GammaBasis() != V->GammaBasis() || in.FieldOrder() != QUDA_FLOAT2_FIELD_ORDER) input = (in.SiteSubset() == QUDA_FULL_SITE_SUBSET) ? fine_tmp_d : &fine_tmp_d->Even(); if (!enable_gpu) errorQuda("not created with enable_gpu set, so cannot run on GPU"); } else { diff --git a/tests/invert_test.cpp b/tests/invert_test.cpp index c7a3e7cf6f..28150340a8 100644 --- a/tests/invert_test.cpp +++ b/tests/invert_test.cpp @@ -128,7 +128,7 @@ void init(int argc, char **argv) // Set sub structures mg_param.invert_param = &mg_inv_param; for (int i = 0; i < mg_levels; i++) { - if (mg_eig[i]) { + if (mg_eig[i] || setup_type[i] == QUDA_SETUP_NULL_VECTOR_EIGENVECTORS || setup_restrict_remaining_type[i] == QUDA_SETUP_NULL_VECTOR_EIGENVECTORS) { mg_eig_param[i] = newQudaEigParam(); setMultigridEigParam(mg_eig_param[i], i); mg_param.eig_param[i] = &mg_eig_param[i]; @@ -137,7 +137,7 @@ void init(int argc, char **argv) } } // Set MG - setMultigridParam(mg_param); + setWilsonMultigridParam(mg_param); } else { setInvertParam(inv_param); } diff --git a/tests/multigrid_evolve_test.cpp b/tests/multigrid_evolve_test.cpp index d8c72f19fc..5f52e2ff5e 100644 --- a/tests/multigrid_evolve_test.cpp +++ b/tests/multigrid_evolve_test.cpp @@ -168,7 +168,7 @@ int main(int argc, char **argv) } } // Set MG - setMultigridParam(mg_param); + setWilsonMultigridParam(mg_param); } else { setInvertParam(inv_param); } diff --git a/tests/staggered_invert_test.cpp b/tests/staggered_invert_test.cpp index 12b67cec13..33012758a7 100644 --- a/tests/staggered_invert_test.cpp +++ b/tests/staggered_invert_test.cpp @@ -185,7 +185,7 @@ int main(int argc, char **argv) mg_param.invert_param = &mg_inv_param; for (int i = 0; i < mg_levels; i++) { - if (mg_eig[i]) { + if (mg_eig[i] || setup_type[i] == QUDA_SETUP_NULL_VECTOR_EIGENVECTORS || setup_restrict_remaining_type[i] == QUDA_SETUP_NULL_VECTOR_EIGENVECTORS) { mg_eig_param[i] = newQudaEigParam(); setMultigridEigParam(mg_eig_param[i], i); mg_param.eig_param[i] = &mg_eig_param[i]; diff --git a/tests/utils/command_line_params.cpp b/tests/utils/command_line_params.cpp index 1c53b40498..fcaf647b03 100644 --- a/tests/utils/command_line_params.cpp +++ b/tests/utils/command_line_params.cpp @@ -50,10 +50,10 @@ int niter = 100; int maxiter_precondition = 10; QudaVerbosity verbosity_precondition = QUDA_SUMMARIZE; int gcrNkrylov = 8; -QudaCABasis ca_basis = QUDA_CHEBYSHEV_BASIS; +QudaPolynomialBasis ca_basis = QUDA_CHEBYSHEV_BASIS; double ca_lambda_min = 0.0; double ca_lambda_max = -1.0; -QudaCABasis ca_basis_precondition = QUDA_POWER_BASIS; +QudaPolynomialBasis ca_basis_precondition = QUDA_POWER_BASIS; double ca_lambda_min_precondition = 0.0; double ca_lambda_max_precondition = -1.0; int pipeline = 0; @@ -130,33 +130,45 @@ quda::mgarray mg_verbosity = {}; quda::mgarray setup_inv = {}; quda::mgarray coarse_solve_type = {}; quda::mgarray smoother_solve_type = {}; + +quda::mgarray setup_type = {}; +quda::mgarray setup_restrict_remaining_type = {}; + +// Parameters for inverse iterations setup quda::mgarray num_setup_iter = {}; quda::mgarray setup_tol = {}; quda::mgarray setup_maxiter = {}; quda::mgarray setup_maxiter_refresh = {}; -quda::mgarray setup_ca_basis = {}; +quda::mgarray setup_maxiter_inverse_iterations_refinement = {}; +quda::mgarray setup_ca_basis = {}; quda::mgarray setup_ca_basis_size = {}; quda::mgarray setup_ca_lambda_min = {}; quda::mgarray setup_ca_lambda_max = {}; -QudaSetupType setup_type = QUDA_NULL_VECTOR_SETUP; + +// Parameters for Chebyshev filter setup +quda::mgarray filter_startup_vectors = {}; +quda::mgarray filter_startup_iterations = {}; +quda::mgarray filter_startup_rescale_frequency = {}; +quda::mgarray filter_iterations_between_vectors = {}; +quda::mgarray filter_lambda_min = {}; +quda::mgarray filter_lambda_max = {}; + bool pre_orthonormalize = false; bool post_orthonormalize = true; double omega = 0.85; quda::mgarray coarse_solver = {}; quda::mgarray coarse_solver_tol = {}; quda::mgarray smoother_type = {}; -quda::mgarray smoother_solver_ca_basis = {}; +quda::mgarray smoother_solver_ca_basis = {}; quda::mgarray smoother_solver_ca_lambda_min = {}; quda::mgarray smoother_solver_ca_lambda_max = {}; QudaPrecision smoother_halo_prec = QUDA_INVALID_PRECISION; quda::mgarray smoother_tol = {}; quda::mgarray coarse_solver_maxiter = {}; -quda::mgarray coarse_solver_ca_basis = {}; +quda::mgarray coarse_solver_ca_basis = {}; quda::mgarray coarse_solver_ca_basis_size = {}; quda::mgarray coarse_solver_ca_lambda_min = {}; quda::mgarray coarse_solver_ca_lambda_max = {}; -bool generate_nullspace = true; -bool generate_all_levels = true; quda::mgarray mg_schwarz_type = {}; quda::mgarray mg_schwarz_cycle = {}; bool mg_evolve_thin_updates = false; @@ -278,7 +290,7 @@ bool enable_testing = false; namespace { - CLI::TransformPairs ca_basis_map {{"power", QUDA_POWER_BASIS}, {"chebyshev", QUDA_CHEBYSHEV_BASIS}}; + CLI::TransformPairs ca_basis_map {{"power", QUDA_POWER_BASIS}, {"chebyshev", QUDA_CHEBYSHEV_BASIS}}; CLI::TransformPairs contract_type_map {{"open", QUDA_CONTRACT_TYPE_OPEN}, {"dr", QUDA_CONTRACT_TYPE_DR}}; @@ -387,7 +399,12 @@ namespace {"SR", QUDA_SPECTRUM_SR_EIG}, {"LR", QUDA_SPECTRUM_LR_EIG}, {"SM", QUDA_SPECTRUM_SM_EIG}, {"LM", QUDA_SPECTRUM_LM_EIG}, {"SI", QUDA_SPECTRUM_SI_EIG}, {"LI", QUDA_SPECTRUM_LI_EIG}}; - CLI::TransformPairs setup_type_map {{"test", QUDA_TEST_VECTOR_SETUP}, {"null", QUDA_TEST_VECTOR_SETUP}}; + CLI::TransformPairs setup_type_map {{"inverse-iterations", QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS}, + {"chebyshev-filter", QUDA_SETUP_NULL_VECTOR_CHEBYSHEV_FILTER}, + {"eigenvectors", QUDA_SETUP_NULL_VECTOR_EIGENVECTORS}, + {"test-vectors", QUDA_SETUP_NULL_VECTOR_TEST_VECTORS}, + {"restrict-fine", QUDA_SETUP_NULL_VECTOR_RESTRICT_FINE}, + {"free-field", QUDA_SETUP_NULL_VECTOR_FREE_FIELD}}; CLI::TransformPairs extlib_map {{"eigen", QUDA_EIGEN_EXTLIB}}; @@ -838,19 +855,13 @@ void add_multigrid_option_group(std::shared_ptr quda_app) "Solve the Even-Odd preconditioned problem (default false)"); quda_app->add_mgoption(opgroup, "--mg-eig-use-poly-acc", mg_eig_use_poly_acc, CLI::Validator(), "Use Chebyshev polynomial acceleration in the eigensolver (default true)"); - opgroup->add_option( - "--mg-generate-all-levels", - generate_all_levels, "true=generate null-space on all levels, false=generate on level 0 and create other levels from that (default true)"); opgroup->add_option("--mg-evolve-thin-updates", mg_evolve_thin_updates, "Utilize thin updates for multigrid evolution tests (default false)"); - opgroup->add_option("--mg-generate-nullspace", generate_nullspace, - "Generate the null-space vector dynamically (default true, if set false and mg-load-vec isn't " - "set, creates free-field null vectors)"); opgroup->add_option("--mg-levels", mg_levels, "The number of multigrid levels to do (default 2)"); // TODO quda_app->add_mgoption(opgroup, "--mg-load-vec", mg_vec_infile, CLI::Validator(), - "Load the vectors for the multigrid_test (requires QIO)"); + "Load the vectors for the multigrid_test, overrides setup (requires QIO)"); quda_app->add_mgoption(opgroup, "--mg-save-vec", mg_vec_outfile, CLI::Validator(), "Save the generated null-space vectors from the multigrid_test (requires QIO)"); @@ -891,6 +902,13 @@ void add_multigrid_option_group(std::shared_ptr quda_app) ->transform(CLI::QUDACheckedTransformer(schwarz_type_map)); quda_app->add_mgoption(opgroup, "--mg-schwarz-cycle", mg_schwarz_cycle, CLI::PositiveNumber, "The number of Schwarz cycles to apply per smoother application (default=1)"); + + quda_app->add_mgoption(opgroup, "--mg-setup-type", setup_type, CLI::QUDACheckedTransformer(setup_type_map), + "The type of setup to use for the multigrid; ignored if --mg-load-vec is set (inverse-iterations (default), chebyshev-filter, eigenvectors, test-vectors, restrict-fine, free-field)"); + + quda_app->add_mgoption(opgroup, "--mg-setup-restrict-remaining-type", setup_restrict_remaining_type, CLI::QUDACheckedTransformer(setup_type_map), + "The type of setup to use for remaining vectors if restricting all fine null vectors (inverse-iterations (default), chebyshev-filter)"); + quda_app->add_mgoption(opgroup, "--mg-setup-ca-basis-size", setup_ca_basis_size, CLI::PositiveNumber, "The basis size to use for CA solver setup of multigrid (default 4)"); quda_app->add_mgoption(opgroup, "--mg-setup-ca-basis-type", setup_ca_basis, CLI::QUDACheckedTransformer(ca_basis_map), @@ -905,7 +923,7 @@ void add_multigrid_option_group(std::shared_ptr quda_app) quda_app->add_mgoption(opgroup, "--mg-setup-inv", setup_inv, solver_trans, "The inverter to use for the setup of multigrid (default bicgstab)"); quda_app->add_mgoption(opgroup, "--mg-setup-iters", num_setup_iter, CLI::PositiveNumber, - "The number of setup iterations to use for the multigrid (default 1)"); + "The number of setup iterations to use for the multigrid, requires test vector setup (default 1)"); quda_app->add_mgoption(opgroup, "--mg-setup-location", setup_location, CLI::QUDACheckedTransformer(field_location_map), "The location where the multigrid setup will be computed (default cuda)"); @@ -915,11 +933,26 @@ void add_multigrid_option_group(std::shared_ptr quda_app) quda_app->add_mgoption( opgroup, "--mg-setup-maxiter-refresh", setup_maxiter_refresh, CLI::Validator(), "The maximum number of solver iterations to use when refreshing the pre-existing null space vectors (default 100)"); + quda_app->add_mgoption( + opgroup, "--mg-setup-maxiter-inverse-iterations-refinement", setup_maxiter_inverse_iterations_refinement, CLI::Validator(), + "The maximum number of solver iterations to use when refining pre-existing null space vectors after a non-inverse-iteration setup (default 0, no refinement)"); quda_app->add_mgoption(opgroup, "--mg-setup-tol", setup_tol, CLI::Validator(), "The tolerance to use for the setup of multigrid (default 5e-6)"); - opgroup->add_option("--mg-setup-type", setup_type, "The type of setup to use for the multigrid (default null)") - ->transform(CLI::QUDACheckedTransformer(setup_type_map)); + quda_app->add_mgoption(opgroup, "--mg-setup-filter-startup-vectors", filter_startup_vectors, CLI::PositiveNumber, + "Number of random starting vectors for Chebyshev filter null space generation (default 1)"); + quda_app->add_mgoption(opgroup, "--mg-setup-filter-startup-iterations", filter_startup_iterations, CLI::PositiveNumber, + "Number of iterations for initial Chebyshev filter (default 1000)"); + quda_app->add_mgoption(opgroup, "--mg-setup-filter-startup-rescale-frequency", filter_startup_rescale_frequency, CLI::PositiveNumber, + "Frequency of rescales during initial filtering which helps avoid overflow (default 50)"); + quda_app->add_mgoption(opgroup, "--mg-setup-filter-iterations-between-vectors", filter_iterations_between_vectors, CLI::PositiveNumber, + "Number of iterations between null vectors generated from each starting vector (default 150)"); + quda_app->add_mgoption( + opgroup, "--mg-setup-filter-lambda-max", filter_lambda_max, CLI::Validator(), + "Conservative estimate of largest eigenvalue of operator used for Chebyshev filter setup (default is to guess with power iterations)"); + quda_app->add_mgoption( + opgroup, "--mg-setup-filter-lambda-min", filter_lambda_min, CLI::PositiveNumber, + "Lower bound of eigenvalues that are not enhanced by the initial Chebyshev filter (default 1)"); opgroup ->add_option( diff --git a/tests/utils/command_line_params.h b/tests/utils/command_line_params.h index f8d5bf8742..67307e8ea4 100644 --- a/tests/utils/command_line_params.h +++ b/tests/utils/command_line_params.h @@ -189,10 +189,10 @@ extern int niter; extern int maxiter_precondition; extern QudaVerbosity verbosity_precondition; extern int gcrNkrylov; -extern QudaCABasis ca_basis; +extern QudaPolynomialBasis ca_basis; extern double ca_lambda_min; extern double ca_lambda_max; -extern QudaCABasis ca_basis_precondition; +extern QudaPolynomialBasis ca_basis_precondition; extern double ca_lambda_min_precondition; extern double ca_lambda_max_precondition; extern int pipeline; @@ -267,33 +267,46 @@ extern quda::mgarray mg_verbosity; extern quda::mgarray setup_inv; extern quda::mgarray coarse_solve_type; extern quda::mgarray smoother_solve_type; + +extern quda::mgarray setup_type; +extern quda::mgarray setup_restrict_remaining_type; + +// Parameters for inverse iterations setup extern quda::mgarray num_setup_iter; extern quda::mgarray setup_tol; extern quda::mgarray setup_maxiter; extern quda::mgarray setup_maxiter_refresh; -extern quda::mgarray setup_ca_basis; +extern quda::mgarray setup_maxiter_inverse_iterations_refinement; +extern quda::mgarray setup_ca_basis; extern quda::mgarray setup_ca_basis_size; extern quda::mgarray setup_ca_lambda_min; extern quda::mgarray setup_ca_lambda_max; -extern QudaSetupType setup_type; + +// Parameters for Chebyshev filter setup +extern quda::mgarray filter_startup_vectors; +extern quda::mgarray filter_startup_iterations; +extern quda::mgarray filter_startup_rescale_frequency; +extern quda::mgarray filter_iterations_between_vectors; +extern quda::mgarray filter_lambda_min; +extern quda::mgarray filter_lambda_max; + + extern bool pre_orthonormalize; extern bool post_orthonormalize; extern double omega; extern quda::mgarray coarse_solver; extern quda::mgarray coarse_solver_tol; extern quda::mgarray smoother_type; -extern quda::mgarray smoother_solver_ca_basis; +extern quda::mgarray smoother_solver_ca_basis; extern quda::mgarray smoother_solver_ca_lambda_min; extern quda::mgarray smoother_solver_ca_lambda_max; extern QudaPrecision smoother_halo_prec; extern quda::mgarray smoother_tol; extern quda::mgarray coarse_solver_maxiter; -extern quda::mgarray coarse_solver_ca_basis; +extern quda::mgarray coarse_solver_ca_basis; extern quda::mgarray coarse_solver_ca_basis_size; extern quda::mgarray coarse_solver_ca_lambda_min; extern quda::mgarray coarse_solver_ca_lambda_max; -extern bool generate_nullspace; -extern bool generate_all_levels; extern quda::mgarray mg_schwarz_type; extern quda::mgarray mg_schwarz_cycle; extern bool mg_evolve_thin_updates; diff --git a/tests/utils/host_utils.cpp b/tests/utils/host_utils.cpp index ce188dbdcc..8666e42eb7 100644 --- a/tests/utils/host_utils.cpp +++ b/tests/utils/host_utils.cpp @@ -93,6 +93,7 @@ void setQudaDefaultMgTestParams() setup_tol[i] = 5e-6; setup_maxiter[i] = 500; setup_maxiter_refresh[i] = 20; + setup_maxiter_inverse_iterations_refinement[i] = 0; mu_factor[i] = 1.; coarse_solve_type[i] = QUDA_INVALID_SOLVE; smoother_solve_type[i] = QUDA_INVALID_SOLVE; @@ -128,6 +129,15 @@ void setQudaDefaultMgTestParams() mg_eig_amax[i] = -1.0; // use power iterations mg_eig_save_prec[i] = QUDA_DOUBLE_PRECISION; + setup_type[i] = QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS; + setup_restrict_remaining_type[i] = QUDA_SETUP_NULL_VECTOR_INVERSE_ITERATIONS; + filter_startup_vectors[i] = 1; + filter_startup_iterations[i] = 1000; + filter_startup_rescale_frequency[i] = 50; + filter_iterations_between_vectors[i] = 150; + filter_lambda_min[i] = 1.0; + filter_lambda_max[i] = -1.0; + setup_ca_basis[i] = QUDA_POWER_BASIS; setup_ca_basis_size[i] = 4; setup_ca_lambda_min[i] = 0.0; diff --git a/tests/utils/host_utils.h b/tests/utils/host_utils.h index 13aff69174..4355d3f9af 100644 --- a/tests/utils/host_utils.h +++ b/tests/utils/host_utils.h @@ -262,7 +262,15 @@ inline void safe_strcpy(char *cstr, const std::string &str, size_t limit, const } // MG param types -void setMultigridParam(QudaMultigridParam &mg_param); + +/** + @brief Set multigrid parameters that are specific to Wilson-type multigrid +*/ +void setWilsonMultigridParam(QudaMultigridParam &mg_param); + +/** + @brief Set multigrid parameters that are specific to staggered-type multigrid +*/ void setStaggeredMultigridParam(QudaMultigridParam &mg_param); // Eig param types diff --git a/tests/utils/set_params.cpp b/tests/utils/set_params.cpp index 76439d52af..978634582f 100644 --- a/tests/utils/set_params.cpp +++ b/tests/utils/set_params.cpp @@ -344,6 +344,7 @@ void setEigParam(QudaEigParam &eig_param) eig_param.struct_size = sizeof(eig_param); } + void setMultigridParam(QudaMultigridParam &mg_param) { QudaInvertParam &inv_param = *mg_param.invert_param; // this will be used to setup SolverParam parent in MGParam class @@ -361,60 +362,21 @@ void setMultigridParam(QudaMultigridParam &mg_param) inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; inv_param.dirac_order = QUDA_DIRAC_ORDER; - if (kappa == -1.0) { - inv_param.mass = mass; - inv_param.kappa = 1.0 / (2.0 * (1 + 3 / anisotropy + mass)); - } else { - inv_param.kappa = kappa; - inv_param.mass = 0.5 / kappa - (1 + 3 / anisotropy); - } - - if (dslash_type == QUDA_CLOVER_WILSON_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH - || dslash_type == QUDA_CLOVER_HASENBUSCH_TWIST_DSLASH) { - inv_param.clover_cpu_prec = cpu_prec; - inv_param.clover_cuda_prec = cuda_prec; - inv_param.clover_cuda_prec_sloppy = cuda_prec_sloppy; - inv_param.clover_cuda_prec_precondition = cuda_prec_precondition; - inv_param.clover_cuda_prec_eigensolver = cuda_prec_eigensolver; - inv_param.clover_cuda_prec_refinement_sloppy = cuda_prec_refinement_sloppy; - inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER; - // Use kappa * csw or supplied clover_coeff - inv_param.clover_csw = clover_csw; - if (clover_coeff == 0.0) { - inv_param.clover_coeff = clover_csw * inv_param.kappa; - } else { - inv_param.clover_coeff = clover_coeff; - } - inv_param.compute_clover_trlog = compute_clover_trlog ? 1 : 0; - } - inv_param.input_location = QUDA_CPU_FIELD_LOCATION; inv_param.output_location = QUDA_CPU_FIELD_LOCATION; inv_param.dslash_type = dslash_type; - if (dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) { - inv_param.mu = mu; - inv_param.epsilon = epsilon; - inv_param.twist_flavor = twist_flavor; - inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) ? 2 : 1; - - if (twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) { - printfQuda("Twisted-mass doublet non supported (yet)\n"); - exit(0); - } - } - inv_param.dagger = QUDA_DAG_NO; - inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; - inv_param.matpc_type = matpc_type; inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_DIRECT_SOLVE; - mg_param.invert_param = &inv_param; + mg_param.use_mma = mg_use_mma ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + mg_param.n_level = mg_levels; + for (int i = 0; i < mg_param.n_level; i++) { for (int j = 0; j < 4; j++) { // if not defined use 4 @@ -428,19 +390,33 @@ void setMultigridParam(QudaMultigridParam &mg_param) mg_param.setup_tol[i] = setup_tol[i]; mg_param.setup_maxiter[i] = setup_maxiter[i]; mg_param.setup_maxiter_refresh[i] = setup_maxiter_refresh[i]; + mg_param.setup_maxiter_inverse_iterations_refinement[i] = setup_maxiter_inverse_iterations_refinement[i]; + + // Setup type to use (inverse iterations, chebyshev filter, eigenvectors, restriction, free field) + mg_param.setup_type[i] = setup_type[i]; + + // Setup type to use to generate remaining near-null vectors when some are restricted + mg_param.setup_restrict_remaining_type[i] = setup_restrict_remaining_type[i]; // Basis to use for CA solver setups mg_param.setup_ca_basis[i] = setup_ca_basis[i]; - // Basis size for CA solver setup + // Basis size for CA solver setups mg_param.setup_ca_basis_size[i] = setup_ca_basis_size[i]; // Minimum and maximum eigenvalue for Chebyshev CA basis setup mg_param.setup_ca_lambda_min[i] = setup_ca_lambda_min[i]; mg_param.setup_ca_lambda_max[i] = setup_ca_lambda_max[i]; + // Parameters for Chebyshev filter multigrid setup + mg_param.filter_startup_vectors[i] = filter_startup_vectors[i]; + mg_param.filter_startup_iterations[i] = filter_startup_iterations[i]; + mg_param.filter_startup_rescale_frequency[i] = filter_startup_rescale_frequency[i]; + mg_param.filter_iterations_between_vectors[i] = filter_iterations_between_vectors[i]; + mg_param.filter_lambda_min[i] = filter_lambda_min[i]; + mg_param.filter_lambda_max[i] = filter_lambda_max[i]; + mg_param.spin_block_size[i] = 1; - mg_param.n_vec[i] = nvec[i] == 0 ? 24 : nvec[i]; // default to 24 vectors if not set mg_param.n_block_ortho[i] = n_block_ortho[i]; // number of times to Gram-Schmidt mg_param.block_ortho_two_pass[i] = block_ortho_two_pass[i] ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; // whether to use a two-pass block ortho @@ -452,9 +428,6 @@ void setMultigridParam(QudaMultigridParam &mg_param) mg_param.cycle_type[i] = QUDA_MG_CYCLE_RECURSIVE; - // Is not a staggered solve, always aggregate - mg_param.transfer_type[i] = QUDA_TRANSFER_AGGREGATE; - // set the coarse solver wrappers including bottom solver mg_param.coarse_solver[i] = coarse_solver[i]; mg_param.coarse_solver_tol[i] = coarse_solver_tol[i]; @@ -495,7 +468,7 @@ void setMultigridParam(QudaMultigridParam &mg_param) mg_param.smoother_solver_ca_lambda_min[i] = smoother_solver_ca_lambda_min[i]; mg_param.smoother_solver_ca_lambda_max[i] = smoother_solver_ca_lambda_max[i]; - // Set set coarse_grid_solution_type: this defines which linear + // Set set coarse_grid_solution_type: this defines which linear // system we are solving on a given level // * QUDA_MAT_SOLUTION - we are solving the full system and inject // a full field into coarse grid @@ -541,15 +514,15 @@ void setMultigridParam(QudaMultigridParam &mg_param) if (i == 0) { // top-level treatment if (coarse_solve_type[0] != solve_type) - errorQuda("Mismatch between top-level MG solve type %d and outer solve type %d", coarse_solve_type[0], - solve_type); + errorQuda("Mismatch between top-level MG solve type %s and outer solve type %s", + get_solve_str(coarse_solve_type[0]), get_solve_str(solve_type)); if (solve_type == QUDA_DIRECT_SOLVE) { mg_param.coarse_grid_solution_type[i] = QUDA_MAT_SOLUTION; } else if (solve_type == QUDA_DIRECT_PC_SOLVE) { mg_param.coarse_grid_solution_type[i] = QUDA_MATPC_SOLUTION; } else { - errorQuda("Unexpected solve_type = %d\n", solve_type); + errorQuda("Unexpected solve_type = %s\n", get_solve_str(solve_type)); } } else { @@ -559,7 +532,7 @@ void setMultigridParam(QudaMultigridParam &mg_param) } else if (coarse_solve_type[i] == QUDA_DIRECT_PC_SOLVE) { mg_param.coarse_grid_solution_type[i] = QUDA_MATPC_SOLUTION; } else { - errorQuda("Unexpected solve_type = %d\n", coarse_solve_type[i]); + errorQuda("Unexpected solve_type = %s\n", get_solve_str(coarse_solve_type[i])); } } @@ -572,33 +545,13 @@ void setMultigridParam(QudaMultigridParam &mg_param) // whether to run GPU setup but putting temporaries into mapped (slow CPU) memory mg_param.setup_minimize_memory = QUDA_BOOLEAN_FALSE; - // only coarsen the spin on the first restriction - mg_param.spin_block_size[0] = 2; - - mg_param.setup_type = setup_type; mg_param.pre_orthonormalize = pre_orthonormalize ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; mg_param.post_orthonormalize = post_orthonormalize ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.compute_null_vector = generate_nullspace ? QUDA_COMPUTE_NULL_VECTOR_YES : QUDA_COMPUTE_NULL_VECTOR_NO; - - mg_param.generate_all_levels = generate_all_levels ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.run_verify = verify_results ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; mg_param.run_low_mode_check = low_mode_check ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; mg_param.run_oblique_proj_check = oblique_proj_check ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.use_mma = mg_use_mma ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - // Whether or not to use thin restarts in the evolve tests - mg_param.thin_update_only = mg_evolve_thin_updates ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - // whether or not to let MG coarsening drop improvements - // ex: for asqtad, dropping the long links for aggregation dimensions smaller than 3 - mg_param.allow_truncation = mg_allow_truncation ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - // whether or not to use the dagger approximation to Xinv, which is X^dagger - mg_param.staggered_kd_dagger_approximation - = mg_staggered_kd_dagger_approximation ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - // set file i/o parameters for (int i = 0; i < mg_param.n_level; i++) { safe_strcpy(mg_param.vec_infile[i], mg_vec_infile[i], 256, "mg_vec_infile[" + std::to_string(i) + "]"); @@ -609,7 +562,12 @@ void setMultigridParam(QudaMultigridParam &mg_param) mg_param.coarse_guess = mg_eig_coarse_guess ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.struct_size = sizeof(mg_param); + // Whether or not to use thin restarts in the evolve tests + mg_param.thin_update_only = mg_evolve_thin_updates ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + + // whether or not to let MG coarsening drop improvements + // ex: for asqtad, dropping the long links for aggregation dimensions smaller than 3 + mg_param.allow_truncation = mg_allow_truncation ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; // these need to tbe set for now but are actually ignored by the MG setup // needed to make it pass the initialization test @@ -622,6 +580,70 @@ void setMultigridParam(QudaMultigridParam &mg_param) inv_param.verbosity = verbosity; inv_param.verbosity_precondition = verbosity; + inv_param.struct_size = sizeof(inv_param); +} + +void setWilsonMultigridParam(QudaMultigridParam &mg_param) +{ + QudaInvertParam &inv_param = *mg_param.invert_param; // this will be used to setup SolverParam parent in MGParam class + + setMultigridParam(mg_param); + + if (kappa == -1.0) { + inv_param.mass = mass; + inv_param.kappa = 1.0 / (2.0 * (1 + 3 / anisotropy + mass)); + } else { + inv_param.kappa = kappa; + inv_param.mass = 0.5 / kappa - (1 + 3 / anisotropy); + } + + if (dslash_type == QUDA_CLOVER_WILSON_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH + || dslash_type == QUDA_CLOVER_HASENBUSCH_TWIST_DSLASH) { + inv_param.clover_cpu_prec = cpu_prec; + inv_param.clover_cuda_prec = cuda_prec; + inv_param.clover_cuda_prec_sloppy = cuda_prec_sloppy; + inv_param.clover_cuda_prec_precondition = cuda_prec_precondition; + inv_param.clover_cuda_prec_eigensolver = cuda_prec_eigensolver; + inv_param.clover_cuda_prec_refinement_sloppy = cuda_prec_refinement_sloppy; + inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER; + // Use kappa * csw or supplied clover_coeff + inv_param.clover_csw = clover_csw; + if (clover_coeff == 0.0) { + inv_param.clover_coeff = clover_csw * inv_param.kappa; + } else { + inv_param.clover_coeff = clover_coeff; + } + inv_param.compute_clover_trlog = compute_clover_trlog ? 1 : 0; + } + + if (dslash_type == QUDA_TWISTED_MASS_DSLASH || dslash_type == QUDA_TWISTED_CLOVER_DSLASH) { + inv_param.mu = mu; + inv_param.epsilon = epsilon; + inv_param.twist_flavor = twist_flavor; + inv_param.Ls = (inv_param.twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) ? 2 : 1; + + if (twist_flavor == QUDA_TWIST_NONDEG_DOUBLET) { + printfQuda("Twisted-mass doublet non supported (yet)\n"); + exit(0); + } + } + + inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; + + mg_param.invert_param = &inv_param; + + for (int i = 0; i < mg_param.n_level; i++) { + + mg_param.n_vec[i] = nvec[i] == 0 ? 24 : nvec[i]; // default to 24 vectors if not set + + // Is not a staggered solve, always aggregate + mg_param.transfer_type[i] = QUDA_TRANSFER_AGGREGATE; + + } + + // only coarsen the spin on the first restriction + mg_param.spin_block_size[0] = 2; + // Use kappa * csw or supplied clover_coeff inv_param.clover_csw = clover_csw; if (clover_coeff == 0.0) { @@ -973,246 +995,38 @@ void setStaggeredInvertParam(QudaInvertParam &inv_param) void setStaggeredMultigridParam(QudaMultigridParam &mg_param) { - QudaInvertParam &inv_param = *mg_param.invert_param; // this will be used to setup SolverParam parent in MGParam class - - // Whether or not to use native BLAS LAPACK - inv_param.native_blas_lapack = (native_blas_lapack ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE); - - inv_param.Ls = 1; - - inv_param.cpu_prec = cpu_prec; - inv_param.cuda_prec = cuda_prec; - inv_param.cuda_prec_sloppy = cuda_prec_sloppy; - inv_param.cuda_prec_precondition = cuda_prec_precondition; - inv_param.cuda_prec_eigensolver = cuda_prec_eigensolver; - inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; - inv_param.dirac_order = QUDA_DIRAC_ORDER; - inv_param.input_location = QUDA_CPU_FIELD_LOCATION; - inv_param.output_location = QUDA_CPU_FIELD_LOCATION; + QudaInvertParam &inv_param = *mg_param.invert_param; // this will be used to setup SolverParam parent in MGParam class - inv_param.dslash_type = dslash_type; + setMultigridParam(mg_param); inv_param.mass = mass; inv_param.kappa = 1.0 / (2.0 * (4.0 + inv_param.mass)); - inv_param.dagger = QUDA_DAG_NO; inv_param.mass_normalization = QUDA_MASS_NORMALIZATION; - inv_param.matpc_type = matpc_type; - inv_param.solution_type = QUDA_MAT_SOLUTION; - - inv_param.solve_type = QUDA_DIRECT_SOLVE; - - mg_param.use_mma = mg_use_mma ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - // whether or not to allow dropping the long links for aggregation dimensions smaller than 3 - mg_param.allow_truncation = mg_allow_truncation ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - // whether or not to use the dagger approximation to Xinv, which is X^dagger - mg_param.staggered_kd_dagger_approximation - = mg_staggered_kd_dagger_approximation ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.invert_param = &inv_param; - mg_param.n_level = mg_levels; + for (int i = 0; i < mg_param.n_level; i++) { - for (int j = 0; j < 4; j++) { - // if not defined use 4 - mg_param.geo_block_size[i][j] = geo_block_size[i][j] ? geo_block_size[i][j] : 4; - } - mg_param.use_eig_solver[i] = mg_eig[i] ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.verbosity[i] = mg_verbosity[i]; - mg_param.setup_inv_type[i] = setup_inv[i]; - mg_param.num_setup_iter[i] = num_setup_iter[i]; - mg_param.setup_tol[i] = setup_tol[i]; - mg_param.setup_maxiter[i] = setup_maxiter[i]; - - // Basis to use for CA solver setups - mg_param.setup_ca_basis[i] = setup_ca_basis[i]; - // Basis size for CA solver setups - mg_param.setup_ca_basis_size[i] = setup_ca_basis_size[i]; - - // Minimum and maximum eigenvalue for Chebyshev CA basis setup - mg_param.setup_ca_lambda_min[i] = setup_ca_lambda_min[i]; - mg_param.setup_ca_lambda_max[i] = setup_ca_lambda_max[i]; - - mg_param.spin_block_size[i] = 1; mg_param.n_vec[i] = nvec[i] == 0 ? 64 : nvec[i]; // default to 64 vectors if not set - mg_param.n_block_ortho[i] = n_block_ortho[i]; // number of times to Gram-Schmidt - mg_param.block_ortho_two_pass[i] - = block_ortho_two_pass[i] ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; // whether to use a two-pass block ortho - mg_param.precision_null[i] = prec_null; // precision to store the null-space basis - mg_param.smoother_halo_precision[i] = smoother_halo_prec; // precision of the halo exchange in the smoother - mg_param.nu_pre[i] = nu_pre[i]; - mg_param.nu_post[i] = nu_post[i]; - mg_param.mu_factor[i] = mu_factor[i]; mg_param.transfer_type[i] = (i == 0) ? staggered_transfer_type : QUDA_TRANSFER_AGGREGATE; - mg_param.cycle_type[i] = QUDA_MG_CYCLE_RECURSIVE; - - // set the coarse solver wrappers including bottom solver - mg_param.coarse_solver[i] = coarse_solver[i]; - mg_param.coarse_solver_tol[i] = coarse_solver_tol[i]; - mg_param.coarse_solver_maxiter[i] = coarse_solver_maxiter[i]; - - // Basis to use for CA coarse solvers - mg_param.coarse_solver_ca_basis[i] = coarse_solver_ca_basis[i]; - - // Basis size for CA coarse solvers - mg_param.coarse_solver_ca_basis_size[i] = coarse_solver_ca_basis_size[i]; - - // Minimum and maximum eigenvalue for Chebyshev CA basis - mg_param.coarse_solver_ca_lambda_min[i] = coarse_solver_ca_lambda_min[i]; - mg_param.coarse_solver_ca_lambda_max[i] = coarse_solver_ca_lambda_max[i]; - - mg_param.smoother[i] = smoother_type[i]; - - // set the smoother / bottom solver tolerance (for MR smoothing this will be ignored) - mg_param.smoother_tol[i] = smoother_tol[i]; - - // set to QUDA_DIRECT_SOLVE for no even/odd preconditioning on the smoother - // set to QUDA_DIRECT_PC_SOLVE for to enable even/odd preconditioning on the smoother - mg_param.smoother_solve_type[i] = smoother_solve_type[i]; - - // set to QUDA_ADDITIVE_SCHWARZ for Additive Schwarz precondioned smoother (presently only impelemented for MR) - mg_param.smoother_schwarz_type[i] = mg_schwarz_type[i]; - - // if using Schwarz preconditioning then use local reductions only - mg_param.global_reduction[i] = (mg_schwarz_type[i] == QUDA_INVALID_SCHWARZ) ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - // set number of Schwarz cycles to apply - mg_param.smoother_schwarz_cycle[i] = mg_schwarz_cycle[i]; - - // Basis to use for CA smoothers - mg_param.smoother_solver_ca_basis[i] = smoother_solver_ca_basis[i]; - - // Minimum and maximum eigenvalue for Chebyshev CA basis smoothers - mg_param.smoother_solver_ca_lambda_min[i] = smoother_solver_ca_lambda_min[i]; - mg_param.smoother_solver_ca_lambda_max[i] = smoother_solver_ca_lambda_max[i]; - - // Set set coarse_grid_solution_type: this defines which linear - // system we are solving on a given level - // * QUDA_MAT_SOLUTION - we are solving the full system and inject - // a full field into coarse grid - // * QUDA_MATPC_SOLUTION - we are solving the e/o-preconditioned - // system, and only inject single parity field into coarse grid - // - // Multiple possible scenarios here - // - // 1. **Direct outer solver and direct smoother**: here we use - // full-field residual coarsening, and everything involves the - // full system so coarse_grid_solution_type = QUDA_MAT_SOLUTION - // - // 2. **Direct outer solver and preconditioned smoother**: here, - // only the smoothing uses e/o preconditioning, so - // coarse_grid_solution_type = QUDA_MAT_SOLUTION_TYPE. - // We reconstruct the full residual prior to coarsening after the - // pre-smoother, and then need to project the solution for post - // smoothing. - // - // 3. **Preconditioned outer solver and preconditioned smoother**: - // here we use single-parity residual coarsening throughout, so - // coarse_grid_solution_type = QUDA_MATPC_SOLUTION. This is a bit - // questionable from a theoretical point of view, since we don't - // coarsen the preconditioned operator directly, rather we coarsen - // the full operator and preconditioned that, but it just works. - // This is the optimal combination in general for Wilson-type - // operators: although there is an occasional increase in - // iteration or two), by working completely in the preconditioned - // space, we save the cost of reconstructing the full residual - // from the preconditioned smoother, and re-projecting for the - // subsequent smoother, as well as reducing the cost of the - // ancillary blas operations in the coarse-grid solve. - // - // Note, we cannot use preconditioned outer solve with direct - // smoother - // - // Finally, we have to treat the top level carefully: for all - // other levels the entry into and out of the grid will be a - // full-field, which we can then work in Schur complement space or - // not (e.g., freedom to choose coarse_grid_solution_type). For - // the top level, if the outer solver is for the preconditioned - // system, then we must use preconditoning, e.g., option 3.) above. - - if (i == 0) { // top-level treatment - if (coarse_solve_type[0] != solve_type) - errorQuda("Mismatch between top-level MG solve type %s and outer solve type %s", - get_solve_str(coarse_solve_type[0]), get_solve_str(solve_type)); - - if (solve_type == QUDA_DIRECT_SOLVE) { - mg_param.coarse_grid_solution_type[i] = QUDA_MAT_SOLUTION; - } else if (solve_type == QUDA_DIRECT_PC_SOLVE) { - mg_param.coarse_grid_solution_type[i] = QUDA_MATPC_SOLUTION; - } else { - errorQuda("Unexpected solve_type = %s\n", get_solve_str(solve_type)); - } - - } else { - - if (coarse_solve_type[i] == QUDA_DIRECT_SOLVE) { - mg_param.coarse_grid_solution_type[i] = QUDA_MAT_SOLUTION; - } else if (coarse_solve_type[i] == QUDA_DIRECT_PC_SOLVE) { - mg_param.coarse_grid_solution_type[i] = QUDA_MATPC_SOLUTION; - } else { - errorQuda("Unexpected solve_type = %s\n", get_solve_str(coarse_solve_type[i])); - } - } - - mg_param.omega[i] = omega; // over/under relaxation factor - - mg_param.location[i] = solver_location[i]; - mg_param.setup_location[i] = setup_location[i]; - nu_pre[i] = 2; - nu_post[i] = 2; } - // whether to run GPU setup but putting temporaries into mapped (slow CPU) memory - mg_param.setup_minimize_memory = QUDA_BOOLEAN_FALSE; - // coarsening the spin on the first restriction is undefined for staggered fields. mg_param.spin_block_size[0] = 0; + // whether or not to use the dagger approximation to Xinv, which is X^dagger + mg_param.staggered_kd_dagger_approximation + = mg_staggered_kd_dagger_approximation ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; + if (staggered_transfer_type == QUDA_TRANSFER_OPTIMIZED_KD || staggered_transfer_type == QUDA_TRANSFER_OPTIMIZED_KD_DROP_LONG) { mg_param.spin_block_size[1] = 0; // we're coarsening the optimized KD op } - mg_param.setup_type = setup_type; - mg_param.pre_orthonormalize = pre_orthonormalize ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.post_orthonormalize = post_orthonormalize ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - mg_param.compute_null_vector = generate_nullspace ? QUDA_COMPUTE_NULL_VECTOR_YES : QUDA_COMPUTE_NULL_VECTOR_NO; - - mg_param.generate_all_levels = generate_all_levels ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - mg_param.run_verify = verify_results ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.run_low_mode_check = low_mode_check ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - mg_param.run_oblique_proj_check = oblique_proj_check ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - // set file i/o parameters - for (int i = 0; i < mg_param.n_level; i++) { - safe_strcpy(mg_param.vec_infile[i], mg_vec_infile[i], 256, "mg_vec_infile[" + std::to_string(i) + "]"); - safe_strcpy(mg_param.vec_outfile[i], mg_vec_outfile[i], 256, "mg_vec_outfile[" + std::to_string(i) + "]"); - if (mg_vec_infile[i].size() > 0) mg_param.vec_load[i] = QUDA_BOOLEAN_TRUE; - if (mg_vec_outfile[i].size() > 0) mg_param.vec_store[i] = QUDA_BOOLEAN_TRUE; - } - - mg_param.coarse_guess = mg_eig_coarse_guess ? QUDA_BOOLEAN_TRUE : QUDA_BOOLEAN_FALSE; - - // these need to tbe set for now but are actually ignored by the MG setup - // needed to make it pass the initialization test - inv_param.inv_type = QUDA_GCR_INVERTER; - inv_param.tol = 1e-10; - inv_param.maxiter = 1000; - inv_param.reliable_delta = reliable_delta; - inv_param.gcrNkrylov = 10; - - inv_param.verbosity = verbosity; - inv_param.verbosity_precondition = verbosity; - - inv_param.struct_size = sizeof(inv_param); } void setDeflatedInvertParam(QudaInvertParam &inv_param)