From 839fead2644cedafb8e7af823e154a77ac63609c Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 11 Jun 2026 15:27:45 -0400 Subject: [PATCH 1/7] bench: add per-solver Riemann benchmark variants --- benchmarks/5eq_rk3_weno3_hll/case.py | 279 +++++++++++++++++++++++++++ benchmarks/5eq_rk3_weno3_lf/case.py | 279 +++++++++++++++++++++++++++ toolchain/bench.yaml | 20 ++ 3 files changed, 578 insertions(+) create mode 100644 benchmarks/5eq_rk3_weno3_hll/case.py create mode 100644 benchmarks/5eq_rk3_weno3_lf/case.py diff --git a/benchmarks/5eq_rk3_weno3_hll/case.py b/benchmarks/5eq_rk3_weno3_hll/case.py new file mode 100644 index 0000000000..1c7558edb7 --- /dev/null +++ b/benchmarks/5eq_rk3_weno3_hll/case.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_hll +# Additional Benchmarked Features +# - model_equations : 2 +# - time_stepper : 3 +# - weno_order : 3 +# - riemann_solver : hll + +import argparse +import json +import math + +parser = argparse.ArgumentParser(prog="Benchmarking Case 1", description="This MFC case was created for the purposes of benchmarking MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument("--mfc", type=json.loads, default="{}", metavar="DICT", help="MFC's toolchain's internal state.") +parser.add_argument("--gbpp", type=int, metavar="MEM", default=16, help="Adjusts the problem size per rank to fit into [MEM] GB of GPU memory per GPU.") +parser.add_argument("--steps", type=int, default=None, help="Override t_step_stop/t_step_save.") + +ARGS = vars(parser.parse_args()) +DICT = ARGS["mfc"] + +size = 1 if DICT["gpu"] else 0 + +ppg = 8000000 / 16.0 +procs = DICT["nodes"] * DICT["tasks_per_node"] +ncells = math.floor(ppg * procs * ARGS["gbpp"]) +s = math.floor((ncells / 2.0) ** (1 / 3)) +Nx, Ny, Nz = 2 * s, s, s + +# athmospheric pressure - Pa (used as reference value) +patm = 101325 + +# Initial Droplet Diameter / Reference length - m +D0 = 1.0e-3 + +# cavity to droplet ratio +CtD = 0.06 + +# cavity relative eccentricity (distance between radii) +ecc = 0.564 + +# initial shock distance from the y axis. Note that the droplet center is located at y = 0. Thus, the distance from the shock to +# the droplet is about D0/8 +ISD = 5.0 / 8 * D0 + +# pre-shock properties - AIR + +# pressure - Pa +p0a = patm + +# density - kg/m3 +rho0a = 1.204 + +# gamma +gama = 1.40 + +# pi infinity - Pa +pia = 0 + +# speed of sound - M/s +c_a = math.sqrt(gama * (p0a + pia) / rho0a) + +# Droplet - WATER + +# surface tension - N / m +st = 0.00e0 + +# Delta Pressure - Pa +DP = -st * 4 / D0 + +# initial pressure inside the droplet - Pa +p0w = p0a - DP + +# density - kg/m3 +rho0w = 1000 + +# gama +gamw = 6.12 + +# pi infty - Pa +piw = 3.43e08 + +# speed of sound - m/s +c_w = math.sqrt(gamw * (p0w + piw) / rho0w) + +# Shock Mach number of interest. Note that the post-shock properties can be defined in terms of either +# Min or psOp0a. Just comment/uncomment appropriately +Min = 2.4 + +# Pos to pre shock ratios - AIR + +# pressure +psOp0a = (Min**2 - 1) * 2 * gama / (gama + 1) + 1 +# psOp0a = 4.5 + +# density +rhosOrho0a = (1 + (gama + 1) / (gama - 1) * psOp0a) / ((gama + 1) / (gama - 1) + psOp0a) + +# Mach number of the shocked region - just a checker, as it must return "Min" +Ms = math.sqrt((gama + 1.0) / (2.0 * gama) * (psOp0a - 1.0) * (p0a / (p0a + pia)) + 1.0) + +# shock speed of sound - m/s +ss = Ms * c_a + +# post-shock - AIR + +# pressure - Pa +ps = psOp0a * p0a + +# density - kg / m3 +rhos = rhosOrho0a * rho0a + +# post shock speed of sound - m/s +c_s = math.sqrt(gama * (ps + pia) / rhos) + +# velocity at the post shock - m/s +vel = c_a / gama * (psOp0a - 1.0) * p0a / (p0a + pia) / Ms + +# Domain boundaries - m + +# x direction +xb = -8.4707 * D0 +xe = 9.6226 * D0 + +# xb = -10 * D0 +# xe = 10 * D0 + +# y direction +yb = 0 * D0 +ye = 10 * D0 + +# y direction +zb = 0 * D0 +ze = 10 * D0 + +# Stretching factor, to make sure the domaing is sufficiently large after the mesh stretch +StF = 4.0 + +# grid delta x if mesh were uniform in x direction - m. Note that I do not need a measure for dy +dx = (xe - xb) / Nx + +# I calculate tend twice; first is an estimate, second is +# the actual value used. This is because I am getting errors in the +# post process part every time I approximate the actual Nt by an integer +# number (think of a smarter way). + +# dimensionless time +ttilde = 1.92 + +# auxiliary simulation physical time - s. This is not YET the total simulation time, as it will be corrected so as to avoid +# mismatches in simulation and post_process parts. Note that I wrote it this way so I have better control over the # of autosaves +tendA = ttilde * D0 / vel + +cfl = 0.1 + +# time-step - s +dt = dx * cfl / ss + +# Save Frequency. Note that the number of autosaves will be SF + 1, as th IC (0.dat) is also saved +SF = 400 + +# making Nt divisible by SF +# 1 - ensure NtA goes slightly beyond tendA +NtA = int(tendA // dt + 1) + +# Array of saves. It is the same as Nt/Sf = t_step_save +AS = int(NtA // SF + 1) + +# Nt = total number of steps. Note that Nt >= NtA (so at least tendA is completely simulated) +Nt = AS * SF + +# total simulation time - s. Note that tend >= tendA +tend = Nt * dt + +# Configuring case dictionary +print( + json.dumps( + { + # Logistics + "run_time_info": "F", + # Computational Domain Parameters + "x_domain%beg": xb, + "x_domain%end": xe, + "y_domain%beg": yb, + "y_domain%end": ye, + "z_domain%beg": zb, + "z_domain%end": ze, + "m": Nx, + "n": Ny, + "p": Nz, + "cyl_coord": "F", + "dt": dt, + "t_step_start": 0, + "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)), + "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)), + # Simulation Algorithm Parameters + "num_patches": 3, + "model_eqns": 2, + "alt_soundspeed": "F", + "num_fluids": 2, + "mpp_lim": "T", + "mixture_err": "T", + "time_stepper": 3, + "weno_order": 3, + "weno_eps": 1.0e-16, + "weno_Re_flux": "F", + "weno_avg": "F", + "mapped_weno": "T", + "riemann_solver": "hll", + "wave_speeds": 1, + "avg_state": 2, + "bc_x%beg": -6, + "bc_x%end": -6, + "bc_y%beg": -2, + "bc_y%end": -3, + "bc_z%beg": -2, + "bc_z%end": -3, + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "parallel_io": "F", + # I will use 1 for WATER properties, and 2 for AIR properties + # Patch 1: Background (AIR - 2) + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": (xb + xe) / 2 * StF, + "patch_icpp(1)%y_centroid": (yb + ye) / 2 * StF, + "patch_icpp(1)%z_centroid": (yb + ye) / 2 * StF, + "patch_icpp(1)%length_x": (xe - xb) * StF, + "patch_icpp(1)%length_y": (ye - yb) * StF, + "patch_icpp(1)%length_z": (ze - zb) * StF, + "patch_icpp(1)%vel(1)": 0.0e00, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": p0a, + "patch_icpp(1)%alpha_rho(1)": 0.0e00, + "patch_icpp(1)%alpha_rho(2)": rho0a, + "patch_icpp(1)%alpha(1)": 0.0e00, + "patch_icpp(1)%alpha(2)": 1.0e00, + # Patch 2: Shocked state (AIR - 2) + "patch_icpp(2)%geometry": 9, + "patch_icpp(2)%alter_patch(1)": "T", + "patch_icpp(2)%x_centroid": -ISD - (xe - xb) / 2 * StF, + "patch_icpp(2)%y_centroid": (yb + ye) / 2 * StF, + "patch_icpp(2)%z_centroid": (zb + ze) / 2 * StF, + "patch_icpp(2)%length_x": (xe - xb) * StF, + "patch_icpp(2)%length_y": (ye - yb) * StF, + "patch_icpp(2)%length_z": (ze - zb) * StF, + "patch_icpp(2)%vel(1)": vel, + "patch_icpp(2)%vel(2)": 0.0e00, + "patch_icpp(2)%vel(3)": 0.0e00, + "patch_icpp(2)%pres": ps, + "patch_icpp(2)%alpha_rho(1)": 0.0e00, + "patch_icpp(2)%alpha_rho(2)": rhos, + "patch_icpp(2)%alpha(1)": 0.0e00, + "patch_icpp(2)%alpha(2)": 1.0e00, + # Patch 3: Droplet (WATER - 1) + "patch_icpp(3)%geometry": 8, + "patch_icpp(3)%x_centroid": 0.0e00, + "patch_icpp(3)%y_centroid": 0.0e00, + "patch_icpp(3)%z_centroid": 0.0e00, + "patch_icpp(3)%radius": D0 / 2, + "patch_icpp(3)%alter_patch(1)": "T", + "patch_icpp(3)%vel(1)": 0.0e00, + "patch_icpp(3)%vel(2)": 0.0e00, + "patch_icpp(3)%vel(3)": 0.0e00, + "patch_icpp(3)%pres": p0w, + "patch_icpp(3)%alpha_rho(1)": rho0w, + "patch_icpp(3)%alpha_rho(2)": 0.0e00, + "patch_icpp(3)%alpha(1)": 1.0e00, + "patch_icpp(3)%alpha(2)": 0.0e00, + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gamw - 1), + "fluid_pp(1)%pi_inf": gamw * piw / (gamw - 1), + "fluid_pp(2)%gamma": 1.0e00 / (gama - 1), + "fluid_pp(2)%pi_inf": gama * pia / (gama - 1), + } + ) +) diff --git a/benchmarks/5eq_rk3_weno3_lf/case.py b/benchmarks/5eq_rk3_weno3_lf/case.py new file mode 100644 index 0000000000..55662ea40d --- /dev/null +++ b/benchmarks/5eq_rk3_weno3_lf/case.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_lax_friedrichs +# Additional Benchmarked Features +# - model_equations : 2 +# - time_stepper : 3 +# - weno_order : 3 +# - riemann_solver : lax_friedrichs + +import argparse +import json +import math + +parser = argparse.ArgumentParser(prog="Benchmarking Case 1", description="This MFC case was created for the purposes of benchmarking MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument("--mfc", type=json.loads, default="{}", metavar="DICT", help="MFC's toolchain's internal state.") +parser.add_argument("--gbpp", type=int, metavar="MEM", default=16, help="Adjusts the problem size per rank to fit into [MEM] GB of GPU memory per GPU.") +parser.add_argument("--steps", type=int, default=None, help="Override t_step_stop/t_step_save.") + +ARGS = vars(parser.parse_args()) +DICT = ARGS["mfc"] + +size = 1 if DICT["gpu"] else 0 + +ppg = 8000000 / 16.0 +procs = DICT["nodes"] * DICT["tasks_per_node"] +ncells = math.floor(ppg * procs * ARGS["gbpp"]) +s = math.floor((ncells / 2.0) ** (1 / 3)) +Nx, Ny, Nz = 2 * s, s, s + +# athmospheric pressure - Pa (used as reference value) +patm = 101325 + +# Initial Droplet Diameter / Reference length - m +D0 = 1.0e-3 + +# cavity to droplet ratio +CtD = 0.06 + +# cavity relative eccentricity (distance between radii) +ecc = 0.564 + +# initial shock distance from the y axis. Note that the droplet center is located at y = 0. Thus, the distance from the shock to +# the droplet is about D0/8 +ISD = 5.0 / 8 * D0 + +# pre-shock properties - AIR + +# pressure - Pa +p0a = patm + +# density - kg/m3 +rho0a = 1.204 + +# gamma +gama = 1.40 + +# pi infinity - Pa +pia = 0 + +# speed of sound - M/s +c_a = math.sqrt(gama * (p0a + pia) / rho0a) + +# Droplet - WATER + +# surface tension - N / m +st = 0.00e0 + +# Delta Pressure - Pa +DP = -st * 4 / D0 + +# initial pressure inside the droplet - Pa +p0w = p0a - DP + +# density - kg/m3 +rho0w = 1000 + +# gama +gamw = 6.12 + +# pi infty - Pa +piw = 3.43e08 + +# speed of sound - m/s +c_w = math.sqrt(gamw * (p0w + piw) / rho0w) + +# Shock Mach number of interest. Note that the post-shock properties can be defined in terms of either +# Min or psOp0a. Just comment/uncomment appropriately +Min = 2.4 + +# Pos to pre shock ratios - AIR + +# pressure +psOp0a = (Min**2 - 1) * 2 * gama / (gama + 1) + 1 +# psOp0a = 4.5 + +# density +rhosOrho0a = (1 + (gama + 1) / (gama - 1) * psOp0a) / ((gama + 1) / (gama - 1) + psOp0a) + +# Mach number of the shocked region - just a checker, as it must return "Min" +Ms = math.sqrt((gama + 1.0) / (2.0 * gama) * (psOp0a - 1.0) * (p0a / (p0a + pia)) + 1.0) + +# shock speed of sound - m/s +ss = Ms * c_a + +# post-shock - AIR + +# pressure - Pa +ps = psOp0a * p0a + +# density - kg / m3 +rhos = rhosOrho0a * rho0a + +# post shock speed of sound - m/s +c_s = math.sqrt(gama * (ps + pia) / rhos) + +# velocity at the post shock - m/s +vel = c_a / gama * (psOp0a - 1.0) * p0a / (p0a + pia) / Ms + +# Domain boundaries - m + +# x direction +xb = -8.4707 * D0 +xe = 9.6226 * D0 + +# xb = -10 * D0 +# xe = 10 * D0 + +# y direction +yb = 0 * D0 +ye = 10 * D0 + +# y direction +zb = 0 * D0 +ze = 10 * D0 + +# Stretching factor, to make sure the domaing is sufficiently large after the mesh stretch +StF = 4.0 + +# grid delta x if mesh were uniform in x direction - m. Note that I do not need a measure for dy +dx = (xe - xb) / Nx + +# I calculate tend twice; first is an estimate, second is +# the actual value used. This is because I am getting errors in the +# post process part every time I approximate the actual Nt by an integer +# number (think of a smarter way). + +# dimensionless time +ttilde = 1.92 + +# auxiliary simulation physical time - s. This is not YET the total simulation time, as it will be corrected so as to avoid +# mismatches in simulation and post_process parts. Note that I wrote it this way so I have better control over the # of autosaves +tendA = ttilde * D0 / vel + +cfl = 0.1 + +# time-step - s +dt = dx * cfl / ss + +# Save Frequency. Note that the number of autosaves will be SF + 1, as th IC (0.dat) is also saved +SF = 400 + +# making Nt divisible by SF +# 1 - ensure NtA goes slightly beyond tendA +NtA = int(tendA // dt + 1) + +# Array of saves. It is the same as Nt/Sf = t_step_save +AS = int(NtA // SF + 1) + +# Nt = total number of steps. Note that Nt >= NtA (so at least tendA is completely simulated) +Nt = AS * SF + +# total simulation time - s. Note that tend >= tendA +tend = Nt * dt + +# Configuring case dictionary +print( + json.dumps( + { + # Logistics + "run_time_info": "F", + # Computational Domain Parameters + "x_domain%beg": xb, + "x_domain%end": xe, + "y_domain%beg": yb, + "y_domain%end": ye, + "z_domain%beg": zb, + "z_domain%end": ze, + "m": Nx, + "n": Ny, + "p": Nz, + "cyl_coord": "F", + "dt": dt, + "t_step_start": 0, + "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)), + "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)), + # Simulation Algorithm Parameters + "num_patches": 3, + "model_eqns": 2, + "alt_soundspeed": "F", + "num_fluids": 2, + "mpp_lim": "T", + "mixture_err": "T", + "time_stepper": 3, + "weno_order": 3, + "weno_eps": 1.0e-16, + "weno_Re_flux": "F", + "weno_avg": "F", + "mapped_weno": "T", + "riemann_solver": "lax_friedrichs", + "wave_speeds": 1, + "avg_state": 2, + "bc_x%beg": -6, + "bc_x%end": -6, + "bc_y%beg": -2, + "bc_y%end": -3, + "bc_z%beg": -2, + "bc_z%end": -3, + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "parallel_io": "F", + # I will use 1 for WATER properties, and 2 for AIR properties + # Patch 1: Background (AIR - 2) + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": (xb + xe) / 2 * StF, + "patch_icpp(1)%y_centroid": (yb + ye) / 2 * StF, + "patch_icpp(1)%z_centroid": (yb + ye) / 2 * StF, + "patch_icpp(1)%length_x": (xe - xb) * StF, + "patch_icpp(1)%length_y": (ye - yb) * StF, + "patch_icpp(1)%length_z": (ze - zb) * StF, + "patch_icpp(1)%vel(1)": 0.0e00, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": p0a, + "patch_icpp(1)%alpha_rho(1)": 0.0e00, + "patch_icpp(1)%alpha_rho(2)": rho0a, + "patch_icpp(1)%alpha(1)": 0.0e00, + "patch_icpp(1)%alpha(2)": 1.0e00, + # Patch 2: Shocked state (AIR - 2) + "patch_icpp(2)%geometry": 9, + "patch_icpp(2)%alter_patch(1)": "T", + "patch_icpp(2)%x_centroid": -ISD - (xe - xb) / 2 * StF, + "patch_icpp(2)%y_centroid": (yb + ye) / 2 * StF, + "patch_icpp(2)%z_centroid": (zb + ze) / 2 * StF, + "patch_icpp(2)%length_x": (xe - xb) * StF, + "patch_icpp(2)%length_y": (ye - yb) * StF, + "patch_icpp(2)%length_z": (ze - zb) * StF, + "patch_icpp(2)%vel(1)": vel, + "patch_icpp(2)%vel(2)": 0.0e00, + "patch_icpp(2)%vel(3)": 0.0e00, + "patch_icpp(2)%pres": ps, + "patch_icpp(2)%alpha_rho(1)": 0.0e00, + "patch_icpp(2)%alpha_rho(2)": rhos, + "patch_icpp(2)%alpha(1)": 0.0e00, + "patch_icpp(2)%alpha(2)": 1.0e00, + # Patch 3: Droplet (WATER - 1) + "patch_icpp(3)%geometry": 8, + "patch_icpp(3)%x_centroid": 0.0e00, + "patch_icpp(3)%y_centroid": 0.0e00, + "patch_icpp(3)%z_centroid": 0.0e00, + "patch_icpp(3)%radius": D0 / 2, + "patch_icpp(3)%alter_patch(1)": "T", + "patch_icpp(3)%vel(1)": 0.0e00, + "patch_icpp(3)%vel(2)": 0.0e00, + "patch_icpp(3)%vel(3)": 0.0e00, + "patch_icpp(3)%pres": p0w, + "patch_icpp(3)%alpha_rho(1)": rho0w, + "patch_icpp(3)%alpha_rho(2)": 0.0e00, + "patch_icpp(3)%alpha(1)": 1.0e00, + "patch_icpp(3)%alpha(2)": 0.0e00, + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gamw - 1), + "fluid_pp(1)%pi_inf": gamw * piw / (gamw - 1), + "fluid_pp(2)%gamma": 1.0e00 / (gama - 1), + "fluid_pp(2)%pi_inf": gama * pia / (gama - 1), + } + ) +) diff --git a/toolchain/bench.yaml b/toolchain/bench.yaml index d09d9863e3..617d9138f3 100644 --- a/toolchain/bench.yaml +++ b/toolchain/bench.yaml @@ -8,6 +8,26 @@ path: benchmarks/5eq_rk3_weno3_hllc/case.py args: [] +# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_hll +# Additional Benchmarked Features +# - model_equations : 2 +# - time_stepper : 3 +# - weno_order : 3 +# - riemann_solver : hll +- slug: 5eq_rk3_weno3_hll + path: benchmarks/5eq_rk3_weno3_hll/case.py + args: [] + +# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_lax_friedrichs +# Additional Benchmarked Features +# - model_equations : 2 +# - time_stepper : 3 +# - weno_order : 3 +# - riemann_solver : lax_friedrichs +- slug: 5eq_rk3_weno3_lf + path: benchmarks/5eq_rk3_weno3_lf/case.py + args: [] + # Benchmark viscosity_weno_Re_flux_T_weno_order_5_bubbles_T_bubble_mode_3_acoustic_T # Additional Benchmarked Features # - viscosity enabled From 6837302513791caf5370506750f51cda3823d75c Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 11 Jun 2026 15:57:25 -0400 Subject: [PATCH 2/7] src: extract shared Riemann interface helpers (mixture, Reynolds, cylindrical source) Adds three GPU_ROUTINE(cray_inline) helpers to m_riemann_state and replaces the duplicated inline blocks in the HLL, HLLC, and LF solvers (HLLD untouched): s_accumulate_mixture_properties (14 call sites: hll 2, lf 2, hllc 10 across the 6-eq/4-eq/5-eq-bubbles/5-eq branches), s_compute_interface_reynolds (10 call sites: hll 2, lf 4, hllc 4), and f_compute_hllc_star_momentum_flux (6 call sites: hllc cylindrical and azimuthal geometric source flux). NOT NFC: calls replace inline arithmetic, so floating-point ordering may shift within golden tolerance. The hllc 5-eq Re_max Reynolds variant is replaced by the shared form: live lanes are arithmetically identical; previously-uninitialized dead lanes (Re_size(i)==0) now get the defined 1/sgm_eps value. The hllc 5-eq branch loads post-limiter alpha into new alpha_lim arrays so alpha_L/R keep their pre-limiter values used downstream. The HLL/LF cylindrical source blocks and the hllc 5-eq-bubbles (1-alpha) Reynolds variant use different arithmetic and stay inline. --- src/simulation/m_riemann_solver_hll.fpp | 49 ++--- src/simulation/m_riemann_solver_hllc.fpp | 236 +++++++++-------------- src/simulation/m_riemann_solver_lf.fpp | 60 +----- src/simulation/m_riemann_state.fpp | 77 ++++++++ 4 files changed, 186 insertions(+), 236 deletions(-) diff --git a/src/simulation/m_riemann_solver_hll.fpp b/src/simulation/m_riemann_solver_hll.fpp index e235a1c0e4..7752f8c813 100644 --- a/src/simulation/m_riemann_solver_hll.fpp +++ b/src/simulation/m_riemann_solver_hll.fpp @@ -89,14 +89,14 @@ contains real(wp) :: vel_L_tmp, vel_R_tmp real(wp) :: Ms_L, Ms_R, pres_SL, pres_SR real(wp) :: alpha_L_sum, alpha_R_sum - real(wp) :: zcoef, pcorr !< low Mach number correction + real(wp) :: zcoef, pcorr !< low Mach number correction type(riemann_states) :: c_fast, pres_mag type(riemann_states_vec3) :: B - type(riemann_states) :: Ga !< Gamma (Lorentz factor) + type(riemann_states) :: Ga !< Gamma (Lorentz factor) type(riemann_states) :: vdotB, B2 - type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z) - type(riemann_states_vec3) :: cm !< Conservative momentum variables - integer :: i, j, k, l, q !< Generic loop iterators + type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z) + type(riemann_states_vec3) :: cm !< Conservative momentum variables + integer :: i, j, k, l !< Generic loop iterators ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & @@ -111,9 +111,9 @@ contains #:set SV = STENCIL_VAR #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, & - & alpha_R, tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, & - & Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, & + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & + & tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, & + & Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, & & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, rho_L, rho_R, & & pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, & & Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, & @@ -197,37 +197,12 @@ contains alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, qv_R) if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L) + call s_compute_interface_reynolds(alpha_R, Re_R) end if if (chemistry) then diff --git a/src/simulation/m_riemann_solver_hllc.fpp b/src/simulation/m_riemann_solver_hllc.fpp index f8ed19379e..5418d5a58a 100644 --- a/src/simulation/m_riemann_solver_hllc.fpp +++ b/src/simulation/m_riemann_solver_hllc.fpp @@ -47,10 +47,13 @@ contains #:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R real(wp), dimension(3) :: alpha_L, alpha_R + real(wp), dimension(3) :: alpha_lim_L, alpha_lim_R real(wp), dimension(3) :: vel_L, vel_R #:else real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R real(wp), dimension(num_fluids) :: alpha_L, alpha_R + !> Post-limiter volume fractions (alpha_L/R retain the pre-limiter loads used downstream) + real(wp), dimension(num_fluids) :: alpha_lim_L, alpha_lim_R real(wp), dimension(num_dims) :: vel_L, vel_R #:endif @@ -117,8 +120,8 @@ contains real(wp) :: rho_Star, E_Star, p_Star, p_K_Star, vel_K_star real(wp) :: pres_SL, pres_SR, Ms_L, Ms_R real(wp) :: flux_ene_e - real(wp) :: zcoef, pcorr !< low Mach number correction - integer :: Re_max, i, j, k, l, q !< Generic loop iterators + real(wp) :: zcoef, pcorr !< low Mach number correction + integer :: i, j, k, l, q !< Generic loop iterators ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & @@ -138,16 +141,16 @@ contains ! 6-EQUATION MODEL WITH HLLC HLLC star-state flux with contact wave speed s_S if (model_eqns == model_eqns_6eq) then ! 6-equation model (model_eqns=3): separate phasic internal energies - $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, vel_L, vel_R, Re_L, Re_R, alpha_L, alpha_R, Ys_L, & - & Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, & - & h_avg_2, tau_e_L, tau_e_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, rho_L, & - & rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, & - & T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, & - & Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, & - & rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, & - & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, & - & alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, & - & xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, vel_L, vel_R, Re_L, Re_R, alpha_L, alpha_R, & + & alpha_rho_L, alpha_rho_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, & + & Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, flux_ene_e, xi_field_L, & + & xi_field_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, & + & Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, & + & Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, & + & qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, & + & vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, & + & alpha_L_sum, alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, & + & s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -210,36 +213,20 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) - gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) - - rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) - gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) - + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%adv%beg + i - 1) alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%adv%beg + i - 1) end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, & + & qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, & + & qv_R) + if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, & - & q) + Re_R(i) - end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L) + call s_compute_interface_reynolds(alpha_R, Re_R) end if E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L @@ -596,18 +583,10 @@ contains alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, & + & qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, & + & qv_R) pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) @@ -740,12 +719,10 @@ contains end do ! Recalculating the radial momentum geometric source flux flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%cont%end + dir_idx(1)) & + & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), & + & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & + & dir_flg(dir_idx(1))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -760,12 +737,9 @@ contains flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & - & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, & + & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, & + & xi_R, xi_M, xi_P, dir_flg(dir_idx(1))) flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif @@ -776,13 +750,13 @@ contains else if (model_eqns == model_eqns_5eq .and. bubbles_euler) then ! 5-equation model with Euler-Euler bubble dynamics $:GPU_PARALLEL_LOOP(collapse=3, private='[i, q, R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, & - & vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, Re_L, Re_R, pcorr, zcoef, rho_L, & - & rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, & - & qv_R, qv_avg, c_L, c_R, c_avg, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, & - & Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, & - & xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, & - & R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, & - & Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]') + & vel_R, rho_avg, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, h_avg, gamma_avg, Re_L, & + & Re_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, gamma_L, gamma_R, & + & pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, c_avg, vel_L_rms, vel_R_rms, & + & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, & + & alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, & + & xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, & + & Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -794,6 +768,8 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do @@ -810,29 +786,15 @@ contains ! Retain this in the refactor if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) - gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) - rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) - gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, & + & pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, & + & pi_inf_R, qv_R) else if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) - gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) - rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) - gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids - 1, alpha_rho_L, alpha_L, rho_L, gamma_L, & + & pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids - 1, alpha_rho_R, alpha_R, rho_R, gamma_R, & + & pi_inf_R, qv_R) else rho_L = qL_prim_rsx_vf(${SF('')}$, 1) gamma_L = gammas(1) @@ -1135,12 +1097,10 @@ contains end do ! Recalculating the radial momentum geometric source flux flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%cont%end + dir_idx(1)) & + & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), & + & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & + & dir_flg(dir_idx(1))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -1156,12 +1116,9 @@ contains end do flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & - & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, & + & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, & + & xi_R, xi_M, xi_P, dir_flg(dir_idx(1))) flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif @@ -1171,15 +1128,15 @@ contains $:END_GPU_PARALLEL_LOOP() else ! 5-equation model (model_eqns=2): mixture total energy, volume fraction advection - $:GPU_PARALLEL_LOOP(collapse=3, private='[Re_max, i, q, T_L, T_R, vel_L_rms, vel_R_rms, pres_L, pres_R, & - & rho_L, gamma_L, pi_inf_L, qv_L, rho_R, gamma_R, pi_inf_R, qv_R, alpha_L_sum, & - & alpha_R_sum, E_L, E_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, & - & Gamm_R, Y_L, Y_R, H_L, H_R, qv_avg, rho_avg, gamma_avg, H_avg, c_L, c_R, c_avg, s_P, & - & s_M, xi_P, xi_M, xi_L, xi_R, xi_L_m1, xi_R_m1, Ms_L, Ms_R, pres_SL, pres_SR, vel_L, & - & vel_R, Re_L, Re_R, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, & - & vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, & - & tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, G_L, & - & G_R]', copyin='[is1, is2, is3]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, T_L, T_R, vel_L_rms, vel_R_rms, pres_L, pres_R, rho_L, gamma_L, & + & pi_inf_L, qv_L, rho_R, gamma_R, pi_inf_R, qv_R, alpha_L_sum, alpha_R_sum, E_L, E_R, & + & MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, Y_L, Y_R, H_L, & + & H_R, qv_avg, rho_avg, gamma_avg, H_avg, c_L, c_R, c_avg, s_P, s_M, xi_P, xi_M, xi_L, & + & xi_R, xi_L_m1, xi_R_m1, Ms_L, Ms_R, pres_SL, pres_SR, vel_L, vel_R, Re_L, Re_R, & + & alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, alpha_lim_L, alpha_lim_R, s_L, s_R, s_S, & + & vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, & + & Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, & + & h_iL, h_iR, h_avg_2, G_L, G_R]', copyin='[is1, is2, is3]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -1230,38 +1187,24 @@ contains end do end if + ! Post-limiter loads for the mixture properties; alpha_L/R keep the pre-limiter loads used + ! downstream $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) - gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) - - rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) - gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) + alpha_lim_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_lim_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do - Re_max = 0 - if (Re_size(1) > 0) Re_max = 1 - if (Re_size(2) > 0) Re_max = 2 + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_lim_L, rho_L, gamma_L, & + & pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_lim_R, rho_R, gamma_R, & + & pi_inf_R, qv_R) if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, Re_max - Re_L(i) = 0._wp - Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L) + call s_compute_interface_reynolds(alpha_R, Re_R) end if if (chemistry) then @@ -1588,12 +1531,10 @@ contains end do ! Recalculating the radial momentum geometric source flux flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%cont%end + dir_idx(1)) & + & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), & + & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & + & dir_flg(dir_idx(1))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -1609,12 +1550,9 @@ contains end do flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & - & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, & + & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, & + & xi_R, xi_M, xi_P, dir_flg(dir_idx(1))) flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif diff --git a/src/simulation/m_riemann_solver_lf.fpp b/src/simulation/m_riemann_solver_lf.fpp index c3c3aaa959..e829edc125 100644 --- a/src/simulation/m_riemann_solver_lf.fpp +++ b/src/simulation/m_riemann_solver_lf.fpp @@ -95,7 +95,7 @@ contains type(riemann_states) :: vdotB, B2 type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z) type(riemann_states_vec3) :: cm !< Conservative momentum variables - integer :: i, j, k, l, q !< Generic loop iterators + integer :: i, j, k, l !< Generic loop iterators integer, dimension(3) :: idx_right_phys !< Physical (j,k,l) indices for right state. ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions @@ -111,9 +111,9 @@ contains #:set SV = STENCIL_VAR #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, & - & alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, & - & Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, & + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & + & tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, & + & Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, & & Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, & & vel_grad_L, vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, & & vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, & @@ -197,37 +197,12 @@ contains alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, qv_R) if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L) + call s_compute_interface_reynolds(alpha_R, Re_R) end if if (chemistry) then @@ -645,23 +620,8 @@ contains end do end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L) + call s_compute_interface_reynolds(alpha_R, Re_R) if (shear_stress) then $:GPU_LOOP(parallelism='[seq]') diff --git a/src/simulation/m_riemann_state.fpp b/src/simulation/m_riemann_state.fpp index 595f785d39..bde913675d 100644 --- a/src/simulation/m_riemann_state.fpp +++ b/src/simulation/m_riemann_state.fpp @@ -1016,6 +1016,83 @@ contains end subroutine s_calculate_bulk_stress_tensor + !> Accumulate the mixture density, specific heat ratio function, liquid stiffness function, and internal energy reference of one + !! Riemann state from its partial densities and volume fractions. The number of fluids is an explicit argument because the + !! 5-equation bubble model accumulates over num_fluids - 1 fluids. + subroutine s_accumulate_mixture_properties(nf, alpha_rho_K, alpha_K, rho_K, gamma_K, pi_inf_K, qv_K) + + $:GPU_ROUTINE(function_name='s_accumulate_mixture_properties', parallelism='[seq]', cray_inline=True) + + integer, intent(in) :: nf !< Number of fluids to accumulate over + real(wp), dimension(nf), intent(in) :: alpha_rho_K, alpha_K + real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K + integer :: i !< Loop iterator over fluids + + rho_K = 0._wp + gamma_K = 0._wp + pi_inf_K = 0._wp + qv_K = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nf + rho_K = rho_K + alpha_rho_K(i) + gamma_K = gamma_K + alpha_K(i)*gammas(i) + pi_inf_K = pi_inf_K + alpha_K(i)*pi_infs(i) + qv_K = qv_K + alpha_rho_K(i)*qvs(i) + end do + + end subroutine s_accumulate_mixture_properties + + !> Compute the shear and volume Reynolds numbers of one Riemann state by inverse-weighting the fluid Reynolds numbers with the + !! volume fractions. + subroutine s_compute_interface_reynolds(alpha_K, Re_K) + + $:GPU_ROUTINE(function_name='s_compute_interface_reynolds', parallelism='[seq]', cray_inline=True) + + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: alpha_K + #:else + real(wp), dimension(num_fluids), intent(in) :: alpha_K + #:endif + real(wp), dimension(2), intent(out) :: Re_K + integer :: i, q !< Loop iterators + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_K(i) = dflt_real + + if (Re_size(i) > 0) Re_K(i) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_K(i) = alpha_K(Re_idx(i, q))/Res_gs(i, q) + Re_K(i) + end do + + Re_K(i) = 1._wp/max(Re_K(i), sgm_eps) + end do + + end subroutine s_compute_interface_reynolds + + !> Compute the advective part of the HLLC star-state momentum flux in the wave-normal direction (pressure excluded), used to + !! assemble the geometrical source flux of the cylindrical and azimuthal sweeps. + function f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L_norm, vel_R_norm, s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & + & dir_flg_norm) result(flux_mom) + + $:GPU_ROUTINE(function_name='f_compute_hllc_star_momentum_flux', parallelism='[seq]', cray_inline=True) + + real(wp), intent(in) :: rho_L, rho_R !< Left and right densities + real(wp), intent(in) :: vel_L_norm, vel_R_norm !< Left and right wave-normal velocities + real(wp), intent(in) :: s_M, s_P, s_S !< Clamped left/right and contact wave speeds + real(wp), intent(in) :: xi_L, xi_R, xi_M, xi_P !< Star-state compression factors and upwind selectors + real(wp), intent(in) :: dir_flg_norm !< Direction flag of the wave-normal direction + real(wp) :: flux_mom + + flux_mom = xi_M*(rho_L*(vel_L_norm*vel_L_norm + s_M*(xi_L*(dir_flg_norm*s_S + (1._wp - dir_flg_norm)*vel_L_norm) & + & - vel_L_norm))) + xi_P*(rho_R*(vel_R_norm*vel_R_norm + s_P*(xi_R*(dir_flg_norm*s_S + (1._wp & + & - dir_flg_norm)*vel_R_norm) - vel_R_norm))) + + end function f_compute_hllc_star_momentum_flux + !> Deallocation and/or disassociation procedures that are needed to finalize the selected Riemann problem solver subroutine s_finalize_riemann_solver(flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir) From b26302b54567d73993eaf5e124a885d40d84e1b8 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 11 Jun 2026 16:56:49 -0400 Subject: [PATCH 3/7] src: unify hypoelastic interface energy into a shared helper; delete dead solver copies Implements Task 2 of the Phase-3 Riemann decomposition plan per Spencer's rulings (2026-06-11) on the BLOCKED-ON-PHYSICS adjudication: 1. Energy gate: the helper gates on G > verysmall, not HLL's hard-coded 1000 floor; HLL's TODO asking for its removal is honored and deleted. This is a deliberate behavior change for soft-G (G in (verysmall, 1000]) hypoelastic runs; golden tests are unaffected (test shear moduli are 5e4/1e5). 2. Damage scaling: the (1 - damage) factor applies whenever cont_damage is on (HLL's existing behavior); the helper carries it. 3. Dead copies: the hypoelastic energy-loading blocks in HLLC (both instances) and LF are deleted, since case_validator.py:577 restricts hypoelasticity to the HLL solver. LF's G_L/G_R locals became unused and are removed from the declarations and the private list; HLLC's tau_e/G locals remain in use by its hyperelastic and elastic flux/wave-speed code. 4. Wave-speed corrections are untouched in all solvers; the hyper-tau_e divergence is filed upstream separately. The helper uses the shear_indices-based energy doubling (HLL's dimension-correct form, not the hard-coded i==2/4/5). The caller loads the SF-indexed stress and damage values and passes them in, keeping tau_e_L/R available for the stress-flux and wave-speed blocks; the mixture moduli G_L/G_R are intent(out) for the elastic wave speeds. --- src/simulation/m_riemann_solver_hll.fpp | 37 +++++---------- src/simulation/m_riemann_solver_hllc.fpp | 57 ------------------------ src/simulation/m_riemann_solver_lf.fpp | 48 ++++---------------- src/simulation/m_riemann_state.fpp | 48 +++++++++++++++++++- 4 files changed, 67 insertions(+), 123 deletions(-) diff --git a/src/simulation/m_riemann_solver_hll.fpp b/src/simulation/m_riemann_solver_hll.fpp index 7752f8c813..d46a770fc1 100644 --- a/src/simulation/m_riemann_solver_hll.fpp +++ b/src/simulation/m_riemann_solver_hll.fpp @@ -75,6 +75,7 @@ contains real(wp) :: c_L, c_R real(wp), dimension(6) :: tau_e_L, tau_e_R real(wp) :: G_L, G_R + real(wp) :: damage_L, damage_R real(wp), dimension(2) :: Re_L, Re_R real(wp), dimension(3) :: xi_field_L, xi_field_R real(wp) :: rho_avg @@ -117,9 +118,9 @@ contains & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, rho_L, rho_R, & & pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, & & Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, & - & gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, & - & gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, Ms_L, Ms_R, pres_SL, & - & pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', copyin='[norm_dir]') + & gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, damage_L, damage_R, & + & rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, & + & Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', copyin='[norm_dir]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -298,34 +299,20 @@ contains ! elastic energy update if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) + do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 + tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) + tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) end do + damage_L = 0._wp; damage_R = 0._wp if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) + damage_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage) + damage_R = qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) - ! Elastic contribution to energy if G large enough TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do + call s_compute_hypoelastic_interface_energy(num_fluids, alpha_L, alpha_R, damage_L, damage_R, & + & tau_e_L, tau_e_R, G_L, G_R, E_L, E_R) end if @:compute_average_state() diff --git a/src/simulation/m_riemann_solver_hllc.fpp b/src/simulation/m_riemann_solver_hllc.fpp index 5418d5a58a..55dea991ea 100644 --- a/src/simulation/m_riemann_solver_hllc.fpp +++ b/src/simulation/m_riemann_solver_hllc.fpp @@ -232,34 +232,6 @@ contains E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do - end if - ! Hyperelastic stress contribution: strain energy added to total energy if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') @@ -1263,35 +1235,6 @@ contains H_R = (E_R + pres_R)/rho_R end if - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do - end if - ! Hyperelastic stress contribution: strain energy added to total energy if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') diff --git a/src/simulation/m_riemann_solver_lf.fpp b/src/simulation/m_riemann_solver_lf.fpp index e829edc125..ce2d6cd237 100644 --- a/src/simulation/m_riemann_solver_lf.fpp +++ b/src/simulation/m_riemann_solver_lf.fpp @@ -74,7 +74,6 @@ contains real(wp) :: qv_L, qv_R real(wp) :: c_L, c_R real(wp), dimension(6) :: tau_e_L, tau_e_R - real(wp) :: G_L, G_R real(wp), dimension(2) :: Re_L, Re_R real(wp), dimension(3) :: xi_field_L, xi_field_R real(wp) :: rho_avg @@ -112,14 +111,14 @@ contains #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & - & tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, & - & Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, & - & Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, & - & vel_grad_L, vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, & - & vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, & - & rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, & - & H_R, ptilde_L, ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, & - & Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]') + & tau_e_L, tau_e_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, & + & xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, & + & h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_grad_L, & + & vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, & + & Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, rho_L, rho_R, & + & gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, H_R, ptilde_L, & + & ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, Cp_L, Cp_R, Cv_L, & + & Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -294,37 +293,6 @@ contains H_R = (E_R + pres_R)/rho_R end if - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) - end if - - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) - ! Elastic contribution to energy if G large enough TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do - end if - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, vel_L_rms, 0._wp, c_L, & & qv_L) diff --git a/src/simulation/m_riemann_state.fpp b/src/simulation/m_riemann_state.fpp index bde913675d..38e15ce995 100644 --- a/src/simulation/m_riemann_state.fpp +++ b/src/simulation/m_riemann_state.fpp @@ -11,7 +11,7 @@ module m_riemann_state use m_derived_types use m_global_parameters - use m_constants, only: riemann_solver_hll, riemann_solver_hlld + use m_constants, only: riemann_solver_hll, riemann_solver_hlld, verysmall use m_hb_function implicit none @@ -1073,6 +1073,52 @@ contains end subroutine s_compute_interface_reynolds + !> Accumulate the hypoelastic stress contribution to the energies of the left and right Riemann states: mix the shear modulus + !! over the fluids, scale it by the continuum damage state when damage is modeled, and add the elastic energy of each stress + !! component (doubled for the shear components) when both mixture moduli are non-negligible. The elastic shear stresses are + !! loaded from the state buffers by the caller, which reuses them for the stress fluxes and elastic wave speeds. + subroutine s_compute_hypoelastic_interface_energy(nf, alpha_L, alpha_R, damage_L, damage_R, tau_e_L, tau_e_R, G_L, G_R, E_L, & + & E_R) + + $:GPU_ROUTINE(function_name='s_compute_hypoelastic_interface_energy', parallelism='[seq]', cray_inline=True) + + integer, intent(in) :: nf !< Number of fluids to mix the shear modulus over + real(wp), dimension(nf), intent(in) :: alpha_L, alpha_R !< Left and right volume fractions + real(wp), intent(in) :: damage_L, damage_R !< Continuum damage states (referenced only when cont_damage) + real(wp), dimension(6), intent(in) :: tau_e_L, tau_e_R !< Left and right elastic shear stresses + real(wp), intent(out) :: G_L, G_R !< Left and right mixture shear moduli + real(wp), intent(inout) :: E_L, E_R !< Left and right state energies + integer :: i !< Loop iterator + + G_L = 0._wp; G_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nf + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + + if (cont_damage) then + G_L = G_L*max((1._wp - damage_L), 0._wp) + G_R = G_R*max((1._wp - damage_R), 0._wp) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + end if + end if + end do + + end subroutine s_compute_hypoelastic_interface_energy + !> Compute the advective part of the HLLC star-state momentum flux in the wave-normal direction (pressure excluded), used to !! assemble the geometrical source flux of the cylindrical and azimuthal sweeps. function f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L_norm, vel_R_norm, s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & From f1ffcbead7a63735138a58aa23de21643ca35e2d Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 11 Jun 2026 17:43:50 -0400 Subject: [PATCH 4/7] docs: document the GPU_ROUTINE helper pattern; record Phase-3 completion --- docs/documentation/gpuParallelization.md | 57 ++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 489ad42b12..4830b480f0 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -607,6 +607,63 @@ Does not do anything for OpenMP currently ------------------------------------------------------------------------------------------ +## Writing GPU_ROUTINE Device Helpers + +`GPU_ROUTINE` marks a subroutine or function as callable from within a GPU kernel +(an OpenACC `routine` or OpenMP `declare target` directive). The standard idiom for +pure, sequential per-thread helpers is: + +```fortran +subroutine s_accumulate_mixture_properties(nf, alpha_rho_K, alpha_K, rho_K, gamma_K, pi_inf_K, qv_K) + + $:GPU_ROUTINE(function_name='s_accumulate_mixture_properties', parallelism='[seq]', cray_inline=True) + + integer, intent(in) :: nf + real(wp), dimension(nf), intent(in) :: alpha_rho_K, alpha_K + real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K + ... +end subroutine s_accumulate_mixture_properties +``` + +**When to use it.** Extract a block into a `GPU_ROUTINE` helper when: + +- It does roughly 50–100 or more FLOPs of work per call, enough to amortize the + call overhead on device. Below that threshold, the helper is justified only when + the same block appears verbatim in three or more callers (duplication removal). +- It is called from inside a `GPU_PARALLEL_LOOP` region — ``parallelism='[seq]'`` is + correct for sequential per-thread work; the surrounding loop keeps the parallelism. + +**Key idioms.** + +- Always ``parallelism='[seq]'`` and `cray_inline=True` for these helpers. The Cray + compiler does not inline cross-file `routine` calls without `cray_inline`; without + it, performance collapses silently on that backend. +- `function_name=` is required when `cray_inline=True` (the Cray inline directive + needs the explicit name). +- The `GPU_ROUTINE` directive lives in the *definition*, not the call site. The + caller needs no annotation beyond being inside a `GPU_PARALLEL_LOOP`. + +**Caller-loads, helper-computes.** Callers do all coordinate-indexed array loads +from the global state arrays (`qL_rs_vf`, `qR_rs_vf`, etc.) before the call; the +helper receives only scalars or small arrays with explicit-shape dimensioning. This +is required because the `SF` indexing lambda used in solver loops is defined locally +inside each solver's `#:for NORM_DIR` block and cannot be referenced from a helper. + +**AMD case-opt compatibility.** Under `--case-optimization` with the AMD backend, +arrays that are sized by runtime parameters at compile time must be declared with an +explicit constant bound. Use an explicit `n` argument (e.g., `integer, intent(in) :: +nf`) and dimension helpers as `dimension(nf)` rather than `dimension(num_fluids)`. +See `s_compute_interface_reynolds` in `src/simulation/m_riemann_state.fpp` for the +`#:if not MFC_CASE_OPTIMIZATION and USING_AMD` guard pattern where the caller-side +dimensioning differs. + +**Declare scoping.** The `GPU_ROUTINE` directive must appear in the source file +that defines the routine. Helpers added to `m_riemann_state.fpp` are automatically +in scope for every solver module that `use`s it — no additional declare-target +annotations are needed at call sites. + +------------------------------------------------------------------------------------------ + # Debugging Tools and Tips for GPUs ## Compiler agnostic tools From a577f15e05557d17f4e26649bead4ed0ab22a5fd Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Thu, 11 Jun 2026 19:53:22 -0400 Subject: [PATCH 5/7] build: fix per-target loop and vestigial lines in MFCTargets Flagged by Copilot review on #1572; all pre-existing on master, carried verbatim by the CMakeLists split. Unified-memory flags (NVHPC managedalloc pair, FRONTIER_UNIFIED x2) now apply to a_target so the NVHPC two-pass IPO lib target gets them too; the set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY]) line is deleted outright - set(ENV) only affects the configure-time process so it never influenced built binaries, and its bracketed value was a literal besides; a leftover debug message(STATUS) removed. --- cmake/MFCTargets.cmake | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cmake/MFCTargets.cmake b/cmake/MFCTargets.cmake index 5242fe9fd8..3e848aca4f 100644 --- a/cmake/MFCTargets.cmake +++ b/cmake/MFCTargets.cmake @@ -74,7 +74,6 @@ exit 0 foreach (a_target ${IPO_TARGETS}) set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON) - message(STATUS ${CMAKE_Fortran_COMPILER_ID}) target_include_directories(${a_target} PRIVATE "${CMAKE_SOURCE_DIR}/src/common" @@ -161,7 +160,6 @@ exit 0 if (NOT TARGET OpenMP::OpenMP_Fortran) message(FATAL_ERROR "OpenMP + Fortran is unsupported.") endif() - set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY]) # target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran) target_compile_definitions(${a_target} PRIVATE MFC_OpenMP MFC_GPU) @@ -209,11 +207,11 @@ exit 0 # GH-200 Unified Memory Support if (MFC_Unified) - target_compile_options(${ARGS_TARGET} + target_compile_options(${a_target} PRIVATE -gpu=mem:unified:managedalloc -cuda ) # "This option must appear in both the compile and link lines" -- NVHPC Docs - target_link_options(${ARGS_TARGET} + target_link_options(${a_target} PRIVATE -gpu=mem:unified:managedalloc -cuda ) endif() @@ -226,7 +224,7 @@ exit 0 elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") # Frontier Unified Memory Support if (MFC_Unified) - target_compile_options(${ARGS_TARGET} + target_compile_options(${a_target} PRIVATE -DFRONTIER_UNIFIED) endif() @@ -239,7 +237,7 @@ exit 0 elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") if (MFC_Unified) - target_compile_options(${ARGS_TARGET} + target_compile_options(${a_target} PRIVATE -DFRONTIER_UNIFIED) endif() From fc7f770c719f6668b09b97a6e6a8bfcaf47cc28b Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Fri, 12 Jun 2026 00:07:53 -0400 Subject: [PATCH 6/7] src: delete the remaining validator-dead hypoelastic blocks in LF and HLLC; doc fixes Guard inventory (every tau_e/G consumer in the two files was classified by its exact guard before deletion): - LF: hypoelastic momentum flux (else if hypoelasticity), hypoelastic energy flux (else if hypoelasticity), stress evolution flux (if hypoelasticity), cylindrical tau_sigmasigma source (if cyl_coord .and. hypoelasticity) - all deleted. - HLLC: stress evolution flux (if hypoelasticity), both 6-eqn and 5-eqn copies - deleted. The hyperelasticity-gated tau_e/G/xi_field loads and the elasticity-gated wave speeds and elastic momentum/energy fluxes are untouched (live under hyperelasticity, which is legal with all solvers). - HLL is untouched. case_validator.py prohibits hypoelasticity with any solver but HLL, so these blocks were unreachable; after PR #1572 deleted the hypoelastic energy blocks they consumed tau_e_L/R that nothing in these files loads. Per the maintainer ruling: prefer deleting code - lifting the restriction later means adding the code back deliberately. Unused locals removed from LF: tau_e_L/R and flux_tau_L/R (zero refs after deletion) and xi_field_L/R (orphaned since the solver split, never referenced in LF) - declarations and GPU private-list entries. HLLC declarations are unchanged (tau_e/G remain live on the hyper/elasticity paths). GPU census: LF -7 acc seq-loop directives (2 deleted seq loops x 3 sweep copies + 1 cyl-only), HLLC -6 (1 seq loop x 3 copies x 2 blocks), OMP path unchanged (seq loops emit nothing under OMP); all deltas trace to deleted blocks. Doc fixes: rescope the cray_inline guidance to cross-module helpers only, describe the AMD case-opt guard as sitting on the helper's dummy declaration with matching caller-local guards, and note in s_compute_hypoelastic_interface_energy that the verysmall gate is the deliberate ruling that replaced HLL's former G > 1000 stability floor and retired its TODO. --- docs/documentation/gpuParallelization.md | 13 +++-- src/simulation/m_riemann_solver_hllc.fpp | 22 --------- src/simulation/m_riemann_solver_lf.fpp | 60 +++--------------------- src/simulation/m_riemann_state.fpp | 4 +- 4 files changed, 18 insertions(+), 81 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 4830b480f0..3023ac7880 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -635,9 +635,11 @@ end subroutine s_accumulate_mixture_properties **Key idioms.** -- Always ``parallelism='[seq]'`` and `cray_inline=True` for these helpers. The Cray - compiler does not inline cross-file `routine` calls without `cray_inline`; without - it, performance collapses silently on that backend. +- Always ``parallelism='[seq]'`` for these helpers. `cray_inline=True` is required + when the helper is called from other modules — the Cray compiler does not inline + cross-file `routine` calls without it, and performance collapses silently on that + backend. Helpers called only from their own module (e.g., the shear/bulk stress + tensor pair in `m_riemann_state.fpp`) do not need it. - `function_name=` is required when `cray_inline=True` (the Cray inline directive needs the explicit name). - The `GPU_ROUTINE` directive lives in the *definition*, not the call site. The @@ -654,8 +656,9 @@ arrays that are sized by runtime parameters at compile time must be declared wit explicit constant bound. Use an explicit `n` argument (e.g., `integer, intent(in) :: nf`) and dimension helpers as `dimension(nf)` rather than `dimension(num_fluids)`. See `s_compute_interface_reynolds` in `src/simulation/m_riemann_state.fpp` for the -`#:if not MFC_CASE_OPTIMIZATION and USING_AMD` guard pattern where the caller-side -dimensioning differs. +`#:if not MFC_CASE_OPTIMIZATION and USING_AMD` guard pattern: the guard sits on the +dummy-argument declaration in the helper's definition, with matching guards on the +callers' own local declarations so the actual and dummy bounds agree. **Declare scoping.** The `GPU_ROUTINE` directive must appear in the source file that defines the routine. Helpers added to `m_riemann_state.fpp` are automatically diff --git a/src/simulation/m_riemann_solver_hllc.fpp b/src/simulation/m_riemann_solver_hllc.fpp index 55dea991ea..da59cbf1a9 100644 --- a/src/simulation/m_riemann_solver_hllc.fpp +++ b/src/simulation/m_riemann_solver_hllc.fpp @@ -442,17 +442,6 @@ contains flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = vel_src_rsx_vf(${SF('')}$, dir_idx(1)) - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - flux_rsx_vf(${SF('')}$, & - & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) & - & - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + xi_P*(s_S/(s_R - s_S)) & - & *(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(dir_idx(1))*tau_e_R(i)) - end do - end if - ! Hyperelastic reference map flux for material deformation tracking if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') @@ -1404,17 +1393,6 @@ contains flux_rsx_vf(${SF('')}$, eqn_idx%E) = flux_rsx_vf(${SF('')}$, eqn_idx%E) + flux_ene_e end if - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - flux_rsx_vf(${SF('')}$, & - & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) & - & - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + xi_P*(s_S/(s_R - s_S)) & - & *(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(dir_idx(1))*tau_e_R(i)) - end do - end if - ! VOLUME FRACTION FLUX. $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end diff --git a/src/simulation/m_riemann_solver_lf.fpp b/src/simulation/m_riemann_solver_lf.fpp index ce2d6cd237..b86367b9d3 100644 --- a/src/simulation/m_riemann_solver_lf.fpp +++ b/src/simulation/m_riemann_solver_lf.fpp @@ -35,7 +35,6 @@ contains ! Intercell fluxes type(scalar_field), dimension(sys_size), intent(inout) :: flux_vf, flux_src_vf, flux_gsrc_vf - real(wp) :: flux_tau_L, flux_tau_R integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz @@ -73,9 +72,7 @@ contains real(wp) :: pi_inf_L, pi_inf_R real(wp) :: qv_L, qv_R real(wp) :: c_L, c_R - real(wp), dimension(6) :: tau_e_L, tau_e_R real(wp), dimension(2) :: Re_L, Re_R - real(wp), dimension(3) :: xi_field_L, xi_field_R real(wp) :: rho_avg real(wp) :: H_avg real(wp) :: gamma_avg @@ -111,14 +108,13 @@ contains #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & - & tau_e_L, tau_e_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, & - & xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, & - & h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_grad_L, & - & vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, & - & Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, rho_L, rho_R, & - & gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, H_R, ptilde_L, & - & ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, Cp_L, Cp_R, Cv_L, & - & Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]') + & Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, & + & Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, & + & B2, b4, cm, pcorr, zcoef, vel_grad_L, vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, & + & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, & + & c_avg, pres_L, pres_R, rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, & + & c_R, E_L, E_R, H_L, H_R, ptilde_L, ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, & + & eps, c_sum_Yi_Phi, Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -379,16 +375,6 @@ contains & + s_M*s_P*(rho_L*vel_L(dir_idx(i)) - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) & & + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do - else if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) & - & + dir_flg(dir_idx(i))*pres_R - tau_e_R(dir_idx_tau(i))) & - & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*pres_L & - & - tau_e_L(dir_idx_tau(i))) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) - end do else $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels @@ -422,17 +408,6 @@ contains & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) - s_P*vel_L(dir_idx(1) & & )*(E_L + pres_L - ptilde_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & & *pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) - end do - flux_rsx_vf(${SF('')}$, & - & eqn_idx%E) = (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - & - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) + s_M*s_P*(E_L - E_R))/(s_M & - & - s_P) else flux_rsx_vf(${SF('')}$, & & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R) - s_P*vel_L(dir_idx(1))*(E_L & @@ -440,16 +415,6 @@ contains & - vel_L_rms)/2._wp end if - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 ! TODO: this indexing may be slow - flux_rsx_vf(${SF('')}$, & - & eqn_idx%stress%beg - 1 + i) = (s_M*(rho_R*vel_R(dir_idx(1))*tau_e_R(i)) & - & - s_P*(rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + s_M*s_P*(rho_L*tau_e_L(i) & - & - rho_R*tau_e_R(i)))/(s_M - s_P) - end do - end if - ! Advection flux and source: interface velocity for volume fraction transport $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -521,17 +486,6 @@ contains flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do end if - - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%cont%end + 2) = flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + 2) + (s_M*tau_e_R(4) - s_P*tau_e_L(4))/(s_M - s_P) - - $:GPU_LOOP(parallelism='[seq]') - do i = eqn_idx%stress%beg, eqn_idx%stress%end - flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) - end do - end if #:endif end do end do diff --git a/src/simulation/m_riemann_state.fpp b/src/simulation/m_riemann_state.fpp index 38e15ce995..9efd3972a2 100644 --- a/src/simulation/m_riemann_state.fpp +++ b/src/simulation/m_riemann_state.fpp @@ -1076,7 +1076,9 @@ contains !> Accumulate the hypoelastic stress contribution to the energies of the left and right Riemann states: mix the shear modulus !! over the fluids, scale it by the continuum damage state when damage is modeled, and add the elastic energy of each stress !! component (doubled for the shear components) when both mixture moduli are non-negligible. The elastic shear stresses are - !! loaded from the state buffers by the caller, which reuses them for the stress fluxes and elastic wave speeds. + !! loaded from the state buffers by the caller, which reuses them for the stress fluxes and elastic wave speeds. The G > + !! verysmall gate is a deliberate maintainer ruling that replaces HLL's former hard-coded G > 1000 stability floor, retiring its + !! "TODO take out if statement if stable without". subroutine s_compute_hypoelastic_interface_energy(nf, alpha_L, alpha_R, damage_L, damage_R, tau_e_L, tau_e_R, G_L, G_R, E_L, & & E_R) From a6d9d9d1b42a541ac033026f48e828b6718d2a9f Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 16 Jun 2026 09:00:34 -0400 Subject: [PATCH 7/7] HLL: add s_M/s_P/xi_M/xi_P to private list (fix CCE-19 defaultmap spill on Cray GPU-OMP) --- src/simulation/m_riemann_solver_hll.fpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/simulation/m_riemann_solver_hll.fpp b/src/simulation/m_riemann_solver_hll.fpp index f46aa94e1c..8cc4ade407 100644 --- a/src/simulation/m_riemann_solver_hll.fpp +++ b/src/simulation/m_riemann_solver_hll.fpp @@ -115,15 +115,15 @@ contains #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & - & tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, & - & Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, & - & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, rho_L, rho_R, & - & pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, & - & Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, & - & gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, damage_L, damage_R, & - & rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, & - & Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', & - & copyin='[norm_dir]', firstprivate='[Re_size_loc1, Re_size_loc2]') + & tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, Ys_L, Ys_R, xi_field_L, & + & xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, & + & h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, & + & rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, & + & T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, & + & gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, damage_L, & + & damage_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, & + & vel_avg_rms, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, & + & flux_tau_R]', copyin='[norm_dir]', firstprivate='[Re_size_loc1, Re_size_loc2]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end