Skip to content

Commit bc6ef2a

Browse files
authored
Merge pull request #1752 from su2code/vectorize_when_possible
Always use vectorization when the numerical scheme supports it
2 parents d32ccec + c9af050 commit bc6ef2a

File tree

58 files changed

+358
-297
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+358
-297
lines changed

Common/doc/docmain.hpp

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
/*!
2929
* \mainpage SU2 version 7.4.0 "Blackbird"
3030
* SU2 suite is an open-source collection of C++ based software tools
31-
* to perform PDE analysis and PDE constrained optimization problems. The toolset is designed with
31+
* to perform PDE analysis and PDE constrained optimization. The toolset is designed with
3232
* computational fluid dynamics and aerodynamic shape optimization in mind, but is extensible to
3333
* include other families of governing equations such as potential flow, electrodynamics, chemically reacting
3434
* flows, and many others. SU2 is released under an
@@ -38,54 +38,53 @@
3838
*/
3939

4040
/*!
41-
* \defgroup Config Descriptions of Configuration Options.
41+
* \defgroup Config Description of the Configuration Options
4242
* \brief Group of variables that can be set using the configuration file.
4343
*/
4444

4545
/*!
46-
* \defgroup ConvDiscr Discretization of the convective terms.
46+
* \defgroup ConvDiscr Discretization of the convective terms
4747
* \brief Group of classes which define the numerical methods for
4848
* discretizing the convective terms of a Partial Differential Equation.
4949
* There are methods for solving the direct, adjoint and linearized
5050
* systems of equations.
5151
*/
5252

5353
/*!
54-
* \defgroup ViscDiscr Discretization of the viscous terms.
54+
* \defgroup ViscDiscr Discretization of the viscous terms
5555
* \brief Group of classes which define the numerical methods for
5656
* discretizing the viscous terms of a Partial Differential Equation.
5757
* There are methods for solving the direct, adjoint and linearized
5858
* systems of equations.
5959
*/
6060

6161
/*!
62-
* \defgroup SourceDiscr Discretization of the source terms.
62+
* \defgroup SourceDiscr Discretization of the source terms
6363
* \brief Group of classes which define the numerical methods for
6464
* discretizing the source terms of a Partial Differential Equation.
6565
* There are methods for solving the direct, adjoint and linearized
6666
* systems of equations.
6767
*/
6868

6969
/*!
70-
* \defgroup Potential_Flow_Equation Solving the potential flow equation.
71-
* \brief Group of classes which define the system of Potential flow equation in
72-
* three formulations: direct, adjoint, and linearized.
73-
*/
74-
75-
/*!
76-
* \defgroup Euler_Equations Solving the Euler's equations.
70+
* \defgroup Euler_Equations Solving the Euler equations
7771
* \brief Group of classes which define the system of Euler equations in
7872
* three formulations: direct, adjoint, and linearized.
7973
*/
8074

8175
/*!
82-
* \defgroup Navier_Stokes_Equations Solving the Navier-Stokes' equations.
76+
* \defgroup Navier_Stokes_Equations Solving the Navier-Stokes equations
8377
* \brief Group of classes which define the system of Navier-Stokes equations in
8478
* three formulations: direct, adjoint, and linearized.
8579
*/
8680

8781
/*!
88-
* \defgroup Turbulence_Model Solving the turbulence models.
82+
* \defgroup Turbulence_Model Solving the turbulence model equations
8983
* \brief Group of classes which define the turbulence model in
9084
* three formulations: direct, adjoint, and linearized.
9185
*/
86+
87+
/*!
88+
* \defgroup Elasticity_Equations Solving the elasticity equations
89+
* \brief Group of classes to solve solid deformation problems.
90+
*/

Common/include/basic_types/datatype_structure.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,21 @@ namespace SU2_TYPE {
114114
FORCEINLINE void SetDerivative(su2double &, const passivedouble &) {}
115115
#endif
116116

117+
/*!
118+
* \brief Get the passive value of any variable. For most types return directly,
119+
* specialize for su2double to call GetValue.
120+
* \note This is a struct instead of a function because the return type of the
121+
* su2double specialization changes.
122+
*/
123+
template <class T>
124+
struct Passive {
125+
FORCEINLINE static T Value(const T& val) {return val;}
126+
};
127+
template <>
128+
struct Passive<su2double> {
129+
FORCEINLINE static passivedouble Value(const su2double& val) {return GetValue(val);}
130+
};
131+
117132
/*!
118133
* \brief Casts the primitive value to int (uses GetValue, already implemented for each type).
119134
* \param[in] data - The non-primitive datatype.

Common/include/linear_algebra/vector_expressions.hpp

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <cassert>
3434
#include <cstdlib>
3535
#include <cmath>
36+
#include <cstdint>
3637

3738
namespace VecExpr {
3839

@@ -157,21 +158,28 @@ FORCEINLINE auto FUN(decay_t<S> u, const CVecExpr<V,S>& v) \
157158
RETURNS( EXPR<Bcast<S>,V,S>(Bcast<S>(u), v.derived()) \
158159
) \
159160

160-
/*--- std::max/min have issues (maybe because they return by reference).
161-
* For AD codi::max/min need to be used to avoid issues in debug builds. ---*/
162-
163-
#if defined(CODI_REVERSE_TYPE) || defined(CODI_FORWARD_TYPE)
164-
#define max_impl math::max
165-
#define min_impl math::min
166-
#else
167-
#define max_impl(a,b) a<b? Scalar(b) : Scalar(a)
168-
#define min_impl(a,b) b<a? Scalar(b) : Scalar(a)
169-
#endif
170-
MAKE_BINARY_FUN(max, max_, max_impl)
171-
MAKE_BINARY_FUN(min, min_, min_impl)
161+
/*--- std::max/min have issues (because they return by reference).
162+
* fmin and fmax return by value and thus are fine, but they would force
163+
* conversions to double, to avoid that we provide integer overloads.
164+
* We use int32/64 instead of int/long to avoid issues with Windows,
165+
* where long is 32 bits (instead of 64 bits). ---*/
166+
167+
#define MAKE_FMINMAX_OVERLOADS(TYPE) \
168+
FORCEINLINE TYPE fmax(TYPE a, TYPE b) { return a<b? b : a; } \
169+
FORCEINLINE TYPE fmin(TYPE a, TYPE b) { return a<b? a : b; }
170+
MAKE_FMINMAX_OVERLOADS(int32_t)
171+
MAKE_FMINMAX_OVERLOADS(int64_t)
172+
MAKE_FMINMAX_OVERLOADS(uint32_t)
173+
MAKE_FMINMAX_OVERLOADS(uint64_t)
174+
/*--- Make the float and double versions of fmin/max available in this
175+
* namespace to avoid ambiguous overloads. ---*/
176+
using std::fmax;
177+
using std::fmin;
178+
#undef MAKE_FMINMAX_OVERLOADS
179+
180+
MAKE_BINARY_FUN(fmax, max_, fmax)
181+
MAKE_BINARY_FUN(fmin, min_, fmin)
172182
MAKE_BINARY_FUN(pow, pow_, math::pow)
173-
#undef max_impl
174-
#undef min_impl
175183

176184
/*--- sts::plus and co. were tried, the code was horrendous (due to the forced
177185
* conversion between different types) and creating functions for these ops
@@ -190,20 +198,25 @@ MAKE_BINARY_FUN(operator/, div_, div_impl)
190198
#undef mul_impl
191199
#undef div_impl
192200

193-
/*--- Relational operators need to be cast to the scalar type to allow vectorization. ---*/
194-
195-
#define le_impl(a,b) Scalar(a<=b)
196-
#define ge_impl(a,b) Scalar(a>=b)
197-
#define eq_impl(a,b) Scalar(a==b)
198-
#define ne_impl(a,b) Scalar(a!=b)
199-
#define lt_impl(a,b) Scalar(a<b)
200-
#define gt_impl(a,b) Scalar(a>b)
201+
/*--- Relational operators need to be cast to the scalar type to allow vectorization.
202+
* TO_PASSIVE is used to convert active scalars to passive, which CoDi will then capture
203+
* by value in its expressions, and thus dangling references are avoided. No AD info
204+
* is lost since these operators are non-differentiable. ---*/
205+
206+
#define TO_PASSIVE(IMPL) SU2_TYPE::Passive<Scalar>::Value(IMPL)
207+
#define le_impl(a,b) TO_PASSIVE(a<=b)
208+
#define ge_impl(a,b) TO_PASSIVE(a>=b)
209+
#define eq_impl(a,b) TO_PASSIVE(a==b)
210+
#define ne_impl(a,b) TO_PASSIVE(a!=b)
211+
#define lt_impl(a,b) TO_PASSIVE(a<b)
212+
#define gt_impl(a,b) TO_PASSIVE(a>b)
201213
MAKE_BINARY_FUN(operator<=, le_, le_impl)
202214
MAKE_BINARY_FUN(operator>=, ge_, ge_impl)
203215
MAKE_BINARY_FUN(operator==, eq_, eq_impl)
204216
MAKE_BINARY_FUN(operator!=, ne_, ne_impl)
205217
MAKE_BINARY_FUN(operator<, lt_, lt_impl)
206218
MAKE_BINARY_FUN(operator>, gt_, gt_impl)
219+
#undef TO_PASSIVE
207220
#undef le_impl
208221
#undef ge_impl
209222
#undef eq_impl

Common/include/parallelization/special_vectorization.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ MAKE_BINARY_FUN(operator==, eq_p)
126126
MAKE_BINARY_FUN(operator!=, ne_p)
127127
MAKE_BINARY_FUN(operator<=, le_p)
128128
MAKE_BINARY_FUN(operator>=, ge_p)
129-
MAKE_BINARY_FUN(max, max_p)
130-
MAKE_BINARY_FUN(min, min_p)
129+
MAKE_BINARY_FUN(fmax, max_p)
130+
MAKE_BINARY_FUN(fmin, min_p)
131131

132132
#undef MAKE_BINARY_FUN
133133

Common/include/parallelization/vectorization.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,17 @@ template<class T>
5858
constexpr size_t preferredLen() { return PREFERRED_SIZE / sizeof(T); }
5959

6060
template<>
61-
constexpr size_t preferredLen<su2double>() { return PREFERRED_SIZE / sizeof(passivedouble); }
61+
constexpr size_t preferredLen<su2double>() {
62+
#ifdef CODI_REVERSE_TYPE
63+
/*--- Use a SIMD size of 1 for reverse AD, larger sizes increase
64+
* the pre-accumulation time with no performance benefit. ---*/
65+
return 1;
66+
#else
67+
/*--- For forward AD there is a performance benefit. This covers
68+
* forward AD and primal mode (su2double == passivedouble). ---*/
69+
return PREFERRED_SIZE / sizeof(passivedouble);
70+
#endif
71+
}
6272

6373
/*!
6474
* \class Array

SU2_CFD/include/numerics/elasticity/CFEAElasticity.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@
3232

3333
/*!
3434
* \class CFEAElasticity
35+
* \ingroup Elasticity_Equations
3536
* \brief Abstract class for computing the tangent matrix and the residual for structural problems.
3637
* \note At the next level of abstraction (linear or not) a class must define the constitutive term.
3738
* The methods we override in this class with an empty implementation are here just to better
3839
* document the public interface of this class hierarchy.
39-
* \ingroup FEM_Discr
4040
* \author R.Sanchez
4141
* \version 7.4.0 "Blackbird"
4242
*/

SU2_CFD/include/numerics/elasticity/CFEALinearElasticity.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
/*!
3434
* \class CFEALinearElasticity
3535
* \brief Class for computing the stiffness matrix of a linear, elastic problem.
36-
* \ingroup FEM_Discr
36+
* \ingroup Elasticity_Equations
3737
* \author R.Sanchez
3838
* \version 7.4.0 "Blackbird"
3939
*/
@@ -88,7 +88,7 @@ class CFEALinearElasticity : public CFEAElasticity {
8888
/*!
8989
* \class CFEAMeshElasticity
9090
* \brief Particular case of linear elasticity used for mesh deformation.
91-
* \ingroup FEM_Discr
91+
* \ingroup Elasticity_Equations
9292
* \author R.Sanchez
9393
* \version 7.4.0 "Blackbird"
9494
*/

SU2_CFD/include/numerics/elasticity/CFEANonlinearElasticity.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
* This class does not implement a particular model, that will be done by its children.
3636
* \note In addition to Compute_Constitutive_Matrix, derived classes MUST further implement
3737
* Compute_Plane_Stress_Term and Compute_Stress_Tensor.
38-
* \ingroup FEM_Discr
38+
* \ingroup Elasticity_Equations
3939
* \author R.Sanchez
4040
* \version 7.4.0 "Blackbird"
4141
*/

SU2_CFD/include/numerics/elasticity/nonlinear_models.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
/*!
3434
* \class CFEM_NeoHookean_Comp
3535
* \brief Class for computing the constitutive and stress tensors for a neo-Hookean material model, compressible.
36-
* \ingroup FEM_Discr
36+
* \ingroup Elasticity_Equations
3737
* \author R.Sanchez
3838
* \version 7.4.0 "Blackbird"
3939
*/
@@ -81,7 +81,7 @@ class CFEM_NeoHookean_Comp final : public CFEANonlinearElasticity {
8181
/*!
8282
* \class CFEM_NeoHookean_Comp
8383
* \brief Constitutive and stress tensors for a Knowles stored-energy function, nearly incompressible.
84-
* \ingroup FEM_Discr
84+
* \ingroup Elasticity_Equations
8585
* \author R.Sanchez
8686
* \version 7.4.0 "Blackbird"
8787
*/
@@ -132,7 +132,7 @@ class CFEM_Knowles_NearInc final : public CFEANonlinearElasticity {
132132
/*!
133133
* \class CFEM_DielectricElastomer
134134
* \brief Class for computing the constitutive and stress tensors for a dielectric elastomer.
135-
* \ingroup FEM_Discr
135+
* \ingroup Elasticity_Equations
136136
* \author R.Sanchez
137137
* \version 7.4.0 "Blackbird"
138138
*/
@@ -180,7 +180,7 @@ class CFEM_DielectricElastomer final : public CFEANonlinearElasticity {
180180
/*!
181181
* \class CFEM_IdealDE
182182
* \brief Class for computing the constitutive and stress tensors for a nearly-incompressible ideal DE.
183-
* \ingroup FEM_Discr
183+
* \ingroup Elasticity_Equations
184184
* \author R.Sanchez
185185
* \version 7.4.0 "Blackbird"
186186
*/

SU2_CFD/include/numerics_simd/CNumericsSIMD.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,12 @@ CNumericsSIMD* createNumerics(const CConfig& config, int iMesh, const CVariable*
130130
* numerical methods.
131131
*/
132132
CNumericsSIMD* CNumericsSIMD::CreateNumerics(const CConfig& config, int nDim, int iMesh, const CVariable* turbVars) {
133+
#ifndef CODI_REVERSE_TYPE
133134
if ((Double::Size < 4) && (SU2_MPI::GetRank() == MASTER_NODE)) {
134-
cout << "WARNING: SU2 was not compiled for an AVX-capable architecture." << endl;
135+
cout << "WARNING: SU2 was not compiled for an AVX-capable architecture. Performance could be better,\n"
136+
" see https://su2code.github.io/docs_v7/Build-SU2-Linux-MacOS/#compiler-optimizations" << endl;
135137
}
138+
#endif
136139
if (nDim == 2) return createNumerics<2>(config, iMesh, turbVars);
137140
if (nDim == 3) return createNumerics<3>(config, iMesh, turbVars);
138141

0 commit comments

Comments
 (0)