From 839fead2644cedafb8e7af823e154a77ac63609c Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 11 Jun 2026 15:27:45 -0400
Subject: [PATCH 1/7] bench: add per-solver Riemann benchmark variants

---
 benchmarks/5eq_rk3_weno3_hll/case.py | 279 +++++++++++++++++++++++++++
 benchmarks/5eq_rk3_weno3_lf/case.py  | 279 +++++++++++++++++++++++++++
 toolchain/bench.yaml                 |  20 ++
 3 files changed, 578 insertions(+)
 create mode 100644 benchmarks/5eq_rk3_weno3_hll/case.py
 create mode 100644 benchmarks/5eq_rk3_weno3_lf/case.py

diff --git a/benchmarks/5eq_rk3_weno3_hll/case.py b/benchmarks/5eq_rk3_weno3_hll/case.py
new file mode 100644
index 0000000000..1c7558edb7
--- /dev/null
+++ b/benchmarks/5eq_rk3_weno3_hll/case.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_hll
+# Additional Benchmarked Features
+# - model_equations : 2
+# - time_stepper : 3
+# - weno_order : 3
+# - riemann_solver : hll
+
+import argparse
+import json
+import math
+
+parser = argparse.ArgumentParser(prog="Benchmarking Case 1", description="This MFC case was created for the purposes of benchmarking MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+parser.add_argument("--mfc", type=json.loads, default="{}", metavar="DICT", help="MFC's toolchain's internal state.")
+parser.add_argument("--gbpp", type=int, metavar="MEM", default=16, help="Adjusts the problem size per rank to fit into [MEM] GB of GPU memory per GPU.")
+parser.add_argument("--steps", type=int, default=None, help="Override t_step_stop/t_step_save.")
+
+ARGS = vars(parser.parse_args())
+DICT = ARGS["mfc"]
+
+size = 1 if DICT["gpu"] else 0
+
+ppg = 8000000 / 16.0
+procs = DICT["nodes"] * DICT["tasks_per_node"]
+ncells = math.floor(ppg * procs * ARGS["gbpp"])
+s = math.floor((ncells / 2.0) ** (1 / 3))
+Nx, Ny, Nz = 2 * s, s, s
+
+# athmospheric pressure - Pa (used as reference value)
+patm = 101325
+
+# Initial Droplet Diameter / Reference length - m
+D0 = 1.0e-3
+
+# cavity to droplet ratio
+CtD = 0.06
+
+# cavity relative eccentricity (distance between radii)
+ecc = 0.564
+
+# initial shock distance from the y axis. Note that the droplet center is located at y = 0. Thus, the distance from the shock to
+# the droplet is about D0/8
+ISD = 5.0 / 8 * D0
+
+# pre-shock properties - AIR
+
+# pressure - Pa
+p0a = patm
+
+# density - kg/m3
+rho0a = 1.204
+
+# gamma
+gama = 1.40
+
+# pi infinity - Pa
+pia = 0
+
+# speed of sound - M/s
+c_a = math.sqrt(gama * (p0a + pia) / rho0a)
+
+# Droplet - WATER
+
+# surface tension - N / m
+st = 0.00e0
+
+# Delta Pressure - Pa
+DP = -st * 4 / D0
+
+# initial pressure inside the droplet - Pa
+p0w = p0a - DP
+
+# density - kg/m3
+rho0w = 1000
+
+# gama
+gamw = 6.12
+
+# pi infty - Pa
+piw = 3.43e08
+
+# speed of sound - m/s
+c_w = math.sqrt(gamw * (p0w + piw) / rho0w)
+
+# Shock Mach number of interest. Note that the post-shock properties can be defined in terms of either
+# Min or psOp0a. Just comment/uncomment appropriately
+Min = 2.4
+
+# Pos to pre shock ratios - AIR
+
+# pressure
+psOp0a = (Min**2 - 1) * 2 * gama / (gama + 1) + 1
+# psOp0a = 4.5
+
+# density
+rhosOrho0a = (1 + (gama + 1) / (gama - 1) * psOp0a) / ((gama + 1) / (gama - 1) + psOp0a)
+
+# Mach number of the shocked region - just a checker, as it must return "Min"
+Ms = math.sqrt((gama + 1.0) / (2.0 * gama) * (psOp0a - 1.0) * (p0a / (p0a + pia)) + 1.0)
+
+# shock speed of sound - m/s
+ss = Ms * c_a
+
+# post-shock - AIR
+
+# pressure - Pa
+ps = psOp0a * p0a
+
+# density - kg / m3
+rhos = rhosOrho0a * rho0a
+
+# post shock speed of sound - m/s
+c_s = math.sqrt(gama * (ps + pia) / rhos)
+
+# velocity at the post shock - m/s
+vel = c_a / gama * (psOp0a - 1.0) * p0a / (p0a + pia) / Ms
+
+# Domain boundaries - m
+
+# x direction
+xb = -8.4707 * D0
+xe = 9.6226 * D0
+
+# xb = -10 * D0
+# xe = 10 * D0
+
+# y direction
+yb = 0 * D0
+ye = 10 * D0
+
+# y direction
+zb = 0 * D0
+ze = 10 * D0
+
+# Stretching factor, to make sure the domaing is sufficiently large after the mesh stretch
+StF = 4.0
+
+# grid delta x if mesh were uniform in x direction - m. Note that I do not need a measure for dy
+dx = (xe - xb) / Nx
+
+# I calculate tend twice; first is an estimate, second is
+# the actual value used. This is because I am getting errors in the
+# post process part every time I approximate the actual Nt by an integer
+# number (think of a smarter way).
+
+# dimensionless time
+ttilde = 1.92
+
+# auxiliary simulation physical time - s. This is not YET the total simulation time, as it will be corrected so as to avoid
+# mismatches in simulation and post_process parts. Note that I wrote it this way so I have better control over the # of autosaves
+tendA = ttilde * D0 / vel
+
+cfl = 0.1
+
+# time-step - s
+dt = dx * cfl / ss
+
+# Save Frequency. Note that the number of autosaves will be SF + 1, as th IC (0.dat) is also saved
+SF = 400
+
+# making Nt divisible by SF
+# 1 - ensure NtA goes slightly beyond tendA
+NtA = int(tendA // dt + 1)
+
+# Array of saves. It is the same as Nt/Sf = t_step_save
+AS = int(NtA // SF + 1)
+
+# Nt = total number of steps. Note that Nt >= NtA (so at least tendA is completely simulated)
+Nt = AS * SF
+
+# total simulation time - s. Note that tend >= tendA
+tend = Nt * dt
+
+# Configuring case dictionary
+print(
+    json.dumps(
+        {
+            # Logistics
+            "run_time_info": "F",
+            # Computational Domain Parameters
+            "x_domain%beg": xb,
+            "x_domain%end": xe,
+            "y_domain%beg": yb,
+            "y_domain%end": ye,
+            "z_domain%beg": zb,
+            "z_domain%end": ze,
+            "m": Nx,
+            "n": Ny,
+            "p": Nz,
+            "cyl_coord": "F",
+            "dt": dt,
+            "t_step_start": 0,
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            # Simulation Algorithm Parameters
+            "num_patches": 3,
+            "model_eqns": 2,
+            "alt_soundspeed": "F",
+            "num_fluids": 2,
+            "mpp_lim": "T",
+            "mixture_err": "T",
+            "time_stepper": 3,
+            "weno_order": 3,
+            "weno_eps": 1.0e-16,
+            "weno_Re_flux": "F",
+            "weno_avg": "F",
+            "mapped_weno": "T",
+            "riemann_solver": "hll",
+            "wave_speeds": 1,
+            "avg_state": 2,
+            "bc_x%beg": -6,
+            "bc_x%end": -6,
+            "bc_y%beg": -2,
+            "bc_y%end": -3,
+            "bc_z%beg": -2,
+            "bc_z%end": -3,
+            # Formatted Database Files Structure Parameters
+            "format": 1,
+            "precision": 2,
+            "prim_vars_wrt": "T",
+            "parallel_io": "F",
+            # I will use 1 for WATER properties, and 2 for AIR properties
+            # Patch 1: Background (AIR - 2)
+            "patch_icpp(1)%geometry": 9,
+            "patch_icpp(1)%x_centroid": (xb + xe) / 2 * StF,
+            "patch_icpp(1)%y_centroid": (yb + ye) / 2 * StF,
+            "patch_icpp(1)%z_centroid": (yb + ye) / 2 * StF,
+            "patch_icpp(1)%length_x": (xe - xb) * StF,
+            "patch_icpp(1)%length_y": (ye - yb) * StF,
+            "patch_icpp(1)%length_z": (ze - zb) * StF,
+            "patch_icpp(1)%vel(1)": 0.0e00,
+            "patch_icpp(1)%vel(2)": 0.0e00,
+            "patch_icpp(1)%vel(3)": 0.0e00,
+            "patch_icpp(1)%pres": p0a,
+            "patch_icpp(1)%alpha_rho(1)": 0.0e00,
+            "patch_icpp(1)%alpha_rho(2)": rho0a,
+            "patch_icpp(1)%alpha(1)": 0.0e00,
+            "patch_icpp(1)%alpha(2)": 1.0e00,
+            # Patch 2: Shocked state (AIR - 2)
+            "patch_icpp(2)%geometry": 9,
+            "patch_icpp(2)%alter_patch(1)": "T",
+            "patch_icpp(2)%x_centroid": -ISD - (xe - xb) / 2 * StF,
+            "patch_icpp(2)%y_centroid": (yb + ye) / 2 * StF,
+            "patch_icpp(2)%z_centroid": (zb + ze) / 2 * StF,
+            "patch_icpp(2)%length_x": (xe - xb) * StF,
+            "patch_icpp(2)%length_y": (ye - yb) * StF,
+            "patch_icpp(2)%length_z": (ze - zb) * StF,
+            "patch_icpp(2)%vel(1)": vel,
+            "patch_icpp(2)%vel(2)": 0.0e00,
+            "patch_icpp(2)%vel(3)": 0.0e00,
+            "patch_icpp(2)%pres": ps,
+            "patch_icpp(2)%alpha_rho(1)": 0.0e00,
+            "patch_icpp(2)%alpha_rho(2)": rhos,
+            "patch_icpp(2)%alpha(1)": 0.0e00,
+            "patch_icpp(2)%alpha(2)": 1.0e00,
+            # Patch 3: Droplet (WATER - 1)
+            "patch_icpp(3)%geometry": 8,
+            "patch_icpp(3)%x_centroid": 0.0e00,
+            "patch_icpp(3)%y_centroid": 0.0e00,
+            "patch_icpp(3)%z_centroid": 0.0e00,
+            "patch_icpp(3)%radius": D0 / 2,
+            "patch_icpp(3)%alter_patch(1)": "T",
+            "patch_icpp(3)%vel(1)": 0.0e00,
+            "patch_icpp(3)%vel(2)": 0.0e00,
+            "patch_icpp(3)%vel(3)": 0.0e00,
+            "patch_icpp(3)%pres": p0w,
+            "patch_icpp(3)%alpha_rho(1)": rho0w,
+            "patch_icpp(3)%alpha_rho(2)": 0.0e00,
+            "patch_icpp(3)%alpha(1)": 1.0e00,
+            "patch_icpp(3)%alpha(2)": 0.0e00,
+            # Fluids Physical Parameters
+            "fluid_pp(1)%gamma": 1.0e00 / (gamw - 1),
+            "fluid_pp(1)%pi_inf": gamw * piw / (gamw - 1),
+            "fluid_pp(2)%gamma": 1.0e00 / (gama - 1),
+            "fluid_pp(2)%pi_inf": gama * pia / (gama - 1),
+        }
+    )
+)
diff --git a/benchmarks/5eq_rk3_weno3_lf/case.py b/benchmarks/5eq_rk3_weno3_lf/case.py
new file mode 100644
index 0000000000..55662ea40d
--- /dev/null
+++ b/benchmarks/5eq_rk3_weno3_lf/case.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_lax_friedrichs
+# Additional Benchmarked Features
+# - model_equations : 2
+# - time_stepper : 3
+# - weno_order : 3
+# - riemann_solver : lax_friedrichs
+
+import argparse
+import json
+import math
+
+parser = argparse.ArgumentParser(prog="Benchmarking Case 1", description="This MFC case was created for the purposes of benchmarking MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+parser.add_argument("--mfc", type=json.loads, default="{}", metavar="DICT", help="MFC's toolchain's internal state.")
+parser.add_argument("--gbpp", type=int, metavar="MEM", default=16, help="Adjusts the problem size per rank to fit into [MEM] GB of GPU memory per GPU.")
+parser.add_argument("--steps", type=int, default=None, help="Override t_step_stop/t_step_save.")
+
+ARGS = vars(parser.parse_args())
+DICT = ARGS["mfc"]
+
+size = 1 if DICT["gpu"] else 0
+
+ppg = 8000000 / 16.0
+procs = DICT["nodes"] * DICT["tasks_per_node"]
+ncells = math.floor(ppg * procs * ARGS["gbpp"])
+s = math.floor((ncells / 2.0) ** (1 / 3))
+Nx, Ny, Nz = 2 * s, s, s
+
+# athmospheric pressure - Pa (used as reference value)
+patm = 101325
+
+# Initial Droplet Diameter / Reference length - m
+D0 = 1.0e-3
+
+# cavity to droplet ratio
+CtD = 0.06
+
+# cavity relative eccentricity (distance between radii)
+ecc = 0.564
+
+# initial shock distance from the y axis. Note that the droplet center is located at y = 0. Thus, the distance from the shock to
+# the droplet is about D0/8
+ISD = 5.0 / 8 * D0
+
+# pre-shock properties - AIR
+
+# pressure - Pa
+p0a = patm
+
+# density - kg/m3
+rho0a = 1.204
+
+# gamma
+gama = 1.40
+
+# pi infinity - Pa
+pia = 0
+
+# speed of sound - M/s
+c_a = math.sqrt(gama * (p0a + pia) / rho0a)
+
+# Droplet - WATER
+
+# surface tension - N / m
+st = 0.00e0
+
+# Delta Pressure - Pa
+DP = -st * 4 / D0
+
+# initial pressure inside the droplet - Pa
+p0w = p0a - DP
+
+# density - kg/m3
+rho0w = 1000
+
+# gama
+gamw = 6.12
+
+# pi infty - Pa
+piw = 3.43e08
+
+# speed of sound - m/s
+c_w = math.sqrt(gamw * (p0w + piw) / rho0w)
+
+# Shock Mach number of interest. Note that the post-shock properties can be defined in terms of either
+# Min or psOp0a. Just comment/uncomment appropriately
+Min = 2.4
+
+# Pos to pre shock ratios - AIR
+
+# pressure
+psOp0a = (Min**2 - 1) * 2 * gama / (gama + 1) + 1
+# psOp0a = 4.5
+
+# density
+rhosOrho0a = (1 + (gama + 1) / (gama - 1) * psOp0a) / ((gama + 1) / (gama - 1) + psOp0a)
+
+# Mach number of the shocked region - just a checker, as it must return "Min"
+Ms = math.sqrt((gama + 1.0) / (2.0 * gama) * (psOp0a - 1.0) * (p0a / (p0a + pia)) + 1.0)
+
+# shock speed of sound - m/s
+ss = Ms * c_a
+
+# post-shock - AIR
+
+# pressure - Pa
+ps = psOp0a * p0a
+
+# density - kg / m3
+rhos = rhosOrho0a * rho0a
+
+# post shock speed of sound - m/s
+c_s = math.sqrt(gama * (ps + pia) / rhos)
+
+# velocity at the post shock - m/s
+vel = c_a / gama * (psOp0a - 1.0) * p0a / (p0a + pia) / Ms
+
+# Domain boundaries - m
+
+# x direction
+xb = -8.4707 * D0
+xe = 9.6226 * D0
+
+# xb = -10 * D0
+# xe = 10 * D0
+
+# y direction
+yb = 0 * D0
+ye = 10 * D0
+
+# y direction
+zb = 0 * D0
+ze = 10 * D0
+
+# Stretching factor, to make sure the domaing is sufficiently large after the mesh stretch
+StF = 4.0
+
+# grid delta x if mesh were uniform in x direction - m. Note that I do not need a measure for dy
+dx = (xe - xb) / Nx
+
+# I calculate tend twice; first is an estimate, second is
+# the actual value used. This is because I am getting errors in the
+# post process part every time I approximate the actual Nt by an integer
+# number (think of a smarter way).
+
+# dimensionless time
+ttilde = 1.92
+
+# auxiliary simulation physical time - s. This is not YET the total simulation time, as it will be corrected so as to avoid
+# mismatches in simulation and post_process parts. Note that I wrote it this way so I have better control over the # of autosaves
+tendA = ttilde * D0 / vel
+
+cfl = 0.1
+
+# time-step - s
+dt = dx * cfl / ss
+
+# Save Frequency. Note that the number of autosaves will be SF + 1, as th IC (0.dat) is also saved
+SF = 400
+
+# making Nt divisible by SF
+# 1 - ensure NtA goes slightly beyond tendA
+NtA = int(tendA // dt + 1)
+
+# Array of saves. It is the same as Nt/Sf = t_step_save
+AS = int(NtA // SF + 1)
+
+# Nt = total number of steps. Note that Nt >= NtA (so at least tendA is completely simulated)
+Nt = AS * SF
+
+# total simulation time - s. Note that tend >= tendA
+tend = Nt * dt
+
+# Configuring case dictionary
+print(
+    json.dumps(
+        {
+            # Logistics
+            "run_time_info": "F",
+            # Computational Domain Parameters
+            "x_domain%beg": xb,
+            "x_domain%end": xe,
+            "y_domain%beg": yb,
+            "y_domain%end": ye,
+            "z_domain%beg": zb,
+            "z_domain%end": ze,
+            "m": Nx,
+            "n": Ny,
+            "p": Nz,
+            "cyl_coord": "F",
+            "dt": dt,
+            "t_step_start": 0,
+            "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)),
+            # Simulation Algorithm Parameters
+            "num_patches": 3,
+            "model_eqns": 2,
+            "alt_soundspeed": "F",
+            "num_fluids": 2,
+            "mpp_lim": "T",
+            "mixture_err": "T",
+            "time_stepper": 3,
+            "weno_order": 3,
+            "weno_eps": 1.0e-16,
+            "weno_Re_flux": "F",
+            "weno_avg": "F",
+            "mapped_weno": "T",
+            "riemann_solver": "lax_friedrichs",
+            "wave_speeds": 1,
+            "avg_state": 2,
+            "bc_x%beg": -6,
+            "bc_x%end": -6,
+            "bc_y%beg": -2,
+            "bc_y%end": -3,
+            "bc_z%beg": -2,
+            "bc_z%end": -3,
+            # Formatted Database Files Structure Parameters
+            "format": 1,
+            "precision": 2,
+            "prim_vars_wrt": "T",
+            "parallel_io": "F",
+            # I will use 1 for WATER properties, and 2 for AIR properties
+            # Patch 1: Background (AIR - 2)
+            "patch_icpp(1)%geometry": 9,
+            "patch_icpp(1)%x_centroid": (xb + xe) / 2 * StF,
+            "patch_icpp(1)%y_centroid": (yb + ye) / 2 * StF,
+            "patch_icpp(1)%z_centroid": (yb + ye) / 2 * StF,
+            "patch_icpp(1)%length_x": (xe - xb) * StF,
+            "patch_icpp(1)%length_y": (ye - yb) * StF,
+            "patch_icpp(1)%length_z": (ze - zb) * StF,
+            "patch_icpp(1)%vel(1)": 0.0e00,
+            "patch_icpp(1)%vel(2)": 0.0e00,
+            "patch_icpp(1)%vel(3)": 0.0e00,
+            "patch_icpp(1)%pres": p0a,
+            "patch_icpp(1)%alpha_rho(1)": 0.0e00,
+            "patch_icpp(1)%alpha_rho(2)": rho0a,
+            "patch_icpp(1)%alpha(1)": 0.0e00,
+            "patch_icpp(1)%alpha(2)": 1.0e00,
+            # Patch 2: Shocked state (AIR - 2)
+            "patch_icpp(2)%geometry": 9,
+            "patch_icpp(2)%alter_patch(1)": "T",
+            "patch_icpp(2)%x_centroid": -ISD - (xe - xb) / 2 * StF,
+            "patch_icpp(2)%y_centroid": (yb + ye) / 2 * StF,
+            "patch_icpp(2)%z_centroid": (zb + ze) / 2 * StF,
+            "patch_icpp(2)%length_x": (xe - xb) * StF,
+            "patch_icpp(2)%length_y": (ye - yb) * StF,
+            "patch_icpp(2)%length_z": (ze - zb) * StF,
+            "patch_icpp(2)%vel(1)": vel,
+            "patch_icpp(2)%vel(2)": 0.0e00,
+            "patch_icpp(2)%vel(3)": 0.0e00,
+            "patch_icpp(2)%pres": ps,
+            "patch_icpp(2)%alpha_rho(1)": 0.0e00,
+            "patch_icpp(2)%alpha_rho(2)": rhos,
+            "patch_icpp(2)%alpha(1)": 0.0e00,
+            "patch_icpp(2)%alpha(2)": 1.0e00,
+            # Patch 3: Droplet (WATER - 1)
+            "patch_icpp(3)%geometry": 8,
+            "patch_icpp(3)%x_centroid": 0.0e00,
+            "patch_icpp(3)%y_centroid": 0.0e00,
+            "patch_icpp(3)%z_centroid": 0.0e00,
+            "patch_icpp(3)%radius": D0 / 2,
+            "patch_icpp(3)%alter_patch(1)": "T",
+            "patch_icpp(3)%vel(1)": 0.0e00,
+            "patch_icpp(3)%vel(2)": 0.0e00,
+            "patch_icpp(3)%vel(3)": 0.0e00,
+            "patch_icpp(3)%pres": p0w,
+            "patch_icpp(3)%alpha_rho(1)": rho0w,
+            "patch_icpp(3)%alpha_rho(2)": 0.0e00,
+            "patch_icpp(3)%alpha(1)": 1.0e00,
+            "patch_icpp(3)%alpha(2)": 0.0e00,
+            # Fluids Physical Parameters
+            "fluid_pp(1)%gamma": 1.0e00 / (gamw - 1),
+            "fluid_pp(1)%pi_inf": gamw * piw / (gamw - 1),
+            "fluid_pp(2)%gamma": 1.0e00 / (gama - 1),
+            "fluid_pp(2)%pi_inf": gama * pia / (gama - 1),
+        }
+    )
+)
diff --git a/toolchain/bench.yaml b/toolchain/bench.yaml
index d09d9863e3..617d9138f3 100644
--- a/toolchain/bench.yaml
+++ b/toolchain/bench.yaml
@@ -8,6 +8,26 @@
   path: benchmarks/5eq_rk3_weno3_hllc/case.py
   args: []
 
+# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_hll
+# Additional Benchmarked Features
+# - model_equations : 2
+# - time_stepper : 3
+# - weno_order : 3
+# - riemann_solver : hll
+- slug: 5eq_rk3_weno3_hll
+  path: benchmarks/5eq_rk3_weno3_hll/case.py
+  args: []
+
+# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_lax_friedrichs
+# Additional Benchmarked Features
+# - model_equations : 2
+# - time_stepper : 3
+# - weno_order : 3
+# - riemann_solver : lax_friedrichs
+- slug: 5eq_rk3_weno3_lf
+  path: benchmarks/5eq_rk3_weno3_lf/case.py
+  args: []
+
 # Benchmark viscosity_weno_Re_flux_T_weno_order_5_bubbles_T_bubble_mode_3_acoustic_T
 # Additional Benchmarked Features
 # - viscosity enabled

From 6837302513791caf5370506750f51cda3823d75c Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 11 Jun 2026 15:57:25 -0400
Subject: [PATCH 2/7] src: extract shared Riemann interface helpers (mixture,
 Reynolds, cylindrical source)

Adds three GPU_ROUTINE(cray_inline) helpers to m_riemann_state and replaces the duplicated inline blocks in the HLL, HLLC, and LF solvers (HLLD untouched): s_accumulate_mixture_properties (14 call sites: hll 2, lf 2, hllc 10 across the 6-eq/4-eq/5-eq-bubbles/5-eq branches), s_compute_interface_reynolds (10 call sites: hll 2, lf 4, hllc 4), and f_compute_hllc_star_momentum_flux (6 call sites: hllc cylindrical and azimuthal geometric source flux).

NOT NFC: calls replace inline arithmetic, so floating-point ordering may shift within golden tolerance. The hllc 5-eq Re_max Reynolds variant is replaced by the shared form: live lanes are arithmetically identical; previously-uninitialized dead lanes (Re_size(i)==0) now get the defined 1/sgm_eps value. The hllc 5-eq branch loads post-limiter alpha into new alpha_lim arrays so alpha_L/R keep their pre-limiter values used downstream. The HLL/LF cylindrical source blocks and the hllc 5-eq-bubbles (1-alpha) Reynolds variant use different arithmetic and stay inline.
---
 src/simulation/m_riemann_solver_hll.fpp  |  49 ++---
 src/simulation/m_riemann_solver_hllc.fpp | 236 +++++++++--------------
 src/simulation/m_riemann_solver_lf.fpp   |  60 +-----
 src/simulation/m_riemann_state.fpp       |  77 ++++++++
 4 files changed, 186 insertions(+), 236 deletions(-)

diff --git a/src/simulation/m_riemann_solver_hll.fpp b/src/simulation/m_riemann_solver_hll.fpp
index e235a1c0e4..7752f8c813 100644
--- a/src/simulation/m_riemann_solver_hll.fpp
+++ b/src/simulation/m_riemann_solver_hll.fpp
@@ -89,14 +89,14 @@ contains
         real(wp)                  :: vel_L_tmp, vel_R_tmp
         real(wp)                  :: Ms_L, Ms_R, pres_SL, pres_SR
         real(wp)                  :: alpha_L_sum, alpha_R_sum
-        real(wp)                  :: zcoef, pcorr   !< low Mach number correction
+        real(wp)                  :: zcoef, pcorr  !< low Mach number correction
         type(riemann_states)      :: c_fast, pres_mag
         type(riemann_states_vec3) :: B
-        type(riemann_states)      :: Ga             !< Gamma (Lorentz factor)
+        type(riemann_states)      :: Ga            !< Gamma (Lorentz factor)
         type(riemann_states)      :: vdotB, B2
-        type(riemann_states_vec3) :: b4             !< 4-magnetic field components (spatial: b4x, b4y, b4z)
-        type(riemann_states_vec3) :: cm             !< Conservative momentum variables
-        integer                   :: i, j, k, l, q  !< Generic loop iterators
+        type(riemann_states_vec3) :: b4            !< 4-magnetic field components (spatial: b4x, b4y, b4z)
+        type(riemann_states_vec3) :: cm            !< Conservative momentum variables
+        integer                   :: i, j, k, l    !< Generic loop iterators
         ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions
 
         call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, &
@@ -111,9 +111,9 @@ contains
             #:set SV = STENCIL_VAR
             #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs)
             if (norm_dir == ${NORM_DIR}$) then
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, &
-                                    & alpha_R, tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, &
-                                    & Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, &
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, &
+                                    & tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, &
+                                    & Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, &
                                     & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, rho_L, rho_R, &
                                     & pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, &
                                     & Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, &
@@ -197,37 +197,12 @@ contains
                                 alpha_R = alpha_R/max(alpha_R_sum, sgm_eps)
                             end if
 
-                            $:GPU_LOOP(parallelism='[seq]')
-                            do i = 1, num_fluids
-                                rho_L = rho_L + alpha_rho_L(i)
-                                gamma_L = gamma_L + alpha_L(i)*gammas(i)
-                                pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i)
-                                qv_L = qv_L + alpha_rho_L(i)*qvs(i)
-
-                                rho_R = rho_R + alpha_rho_R(i)
-                                gamma_R = gamma_R + alpha_R(i)*gammas(i)
-                                pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i)
-                                qv_R = qv_R + alpha_rho_R(i)*qvs(i)
-                            end do
+                            call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, qv_L)
+                            call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, qv_R)
 
                             if (viscous) then
-                                $:GPU_LOOP(parallelism='[seq]')
-                                do i = 1, 2
-                                    Re_L(i) = dflt_real
-                                    Re_R(i) = dflt_real
-
-                                    if (Re_size(i) > 0) Re_L(i) = 0._wp
-                                    if (Re_size(i) > 0) Re_R(i) = 0._wp
-
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do q = 1, Re_size(i)
-                                        Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
-                                        Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
-                                    end do
-
-                                    Re_L(i) = 1._wp/max(Re_L(i), sgm_eps)
-                                    Re_R(i) = 1._wp/max(Re_R(i), sgm_eps)
-                                end do
+                                call s_compute_interface_reynolds(alpha_L, Re_L)
+                                call s_compute_interface_reynolds(alpha_R, Re_R)
                             end if
 
                             if (chemistry) then
diff --git a/src/simulation/m_riemann_solver_hllc.fpp b/src/simulation/m_riemann_solver_hllc.fpp
index f8ed19379e..5418d5a58a 100644
--- a/src/simulation/m_riemann_solver_hllc.fpp
+++ b/src/simulation/m_riemann_solver_hllc.fpp
@@ -47,10 +47,13 @@ contains
         #:if not MFC_CASE_OPTIMIZATION and USING_AMD
             real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R
             real(wp), dimension(3) :: alpha_L, alpha_R
+            real(wp), dimension(3) :: alpha_lim_L, alpha_lim_R
             real(wp), dimension(3) :: vel_L, vel_R
         #:else
             real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R
             real(wp), dimension(num_fluids) :: alpha_L, alpha_R
+            !> Post-limiter volume fractions (alpha_L/R retain the pre-limiter loads used downstream)
+            real(wp), dimension(num_fluids) :: alpha_lim_L, alpha_lim_R
             real(wp), dimension(num_dims)   :: vel_L, vel_R
         #:endif
 
@@ -117,8 +120,8 @@ contains
         real(wp) :: rho_Star, E_Star, p_Star, p_K_Star, vel_K_star
         real(wp) :: pres_SL, pres_SR, Ms_L, Ms_R
         real(wp) :: flux_ene_e
-        real(wp) :: zcoef, pcorr           !< low Mach number correction
-        integer  :: Re_max, i, j, k, l, q  !< Generic loop iterators
+        real(wp) :: zcoef, pcorr   !< low Mach number correction
+        integer  :: i, j, k, l, q  !< Generic loop iterators
         ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions
 
         call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, &
@@ -138,16 +141,16 @@ contains
                 ! 6-EQUATION MODEL WITH HLLC HLLC star-state flux with contact wave speed s_S
                 if (model_eqns == model_eqns_6eq) then
                     ! 6-equation model (model_eqns=3): separate phasic internal energies
-                    $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, vel_L, vel_R, Re_L, Re_R, alpha_L, alpha_R, Ys_L, &
-                                        & Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, &
-                                        & h_avg_2, tau_e_L, tau_e_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, rho_L, &
-                                        & rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, &
-                                        & T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, &
-                                        & Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, &
-                                        & rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, &
-                                        & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, &
-                                        & alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, &
-                                        & xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]')
+                    $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, vel_L, vel_R, Re_L, Re_R, alpha_L, alpha_R, &
+                                        & alpha_rho_L, alpha_rho_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, &
+                                        & Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, flux_ene_e, xi_field_L, &
+                                        & xi_field_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, &
+                                        & Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, &
+                                        & Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, &
+                                        & qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, &
+                                        & vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, &
+                                        & alpha_L_sum, alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, &
+                                        & s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]')
                     do l = ${Z_BND}$%beg, ${Z_BND}$%end
                         do k = ${Y_BND}$%beg, ${Y_BND}$%end
                             do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -210,36 +213,20 @@ contains
 
                                 $:GPU_LOOP(parallelism='[seq]')
                                 do i = 1, num_fluids
-                                    rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i)
-                                    gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i)
-                                    pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i)
-                                    qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i)
-
-                                    rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)
-                                    gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i)
-                                    pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i)
-                                    qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i)
-
+                                    alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i)
+                                    alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i)
                                     alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%adv%beg + i - 1)
                                     alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%adv%beg + i - 1)
                                 end do
 
+                                call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, &
+                                                                     & qv_L)
+                                call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, &
+                                                                     & qv_R)
+
                                 if (viscous) then
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, 2
-                                        Re_L(i) = dflt_real
-                                        Re_R(i) = dflt_real
-                                        if (Re_size(i) > 0) Re_L(i) = 0._wp
-                                        if (Re_size(i) > 0) Re_R(i) = 0._wp
-                                        $:GPU_LOOP(parallelism='[seq]')
-                                        do q = 1, Re_size(i)
-                                            Re_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
-                                            Re_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, &
-                                                 & q) + Re_R(i)
-                                        end do
-                                        Re_L(i) = 1._wp/max(Re_L(i), sgm_eps)
-                                        Re_R(i) = 1._wp/max(Re_R(i), sgm_eps)
-                                    end do
+                                    call s_compute_interface_reynolds(alpha_L, Re_L)
+                                    call s_compute_interface_reynolds(alpha_R, Re_R)
                                 end if
 
                                 E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L
@@ -596,18 +583,10 @@ contains
                                     alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)
                                 end do
 
-                                $:GPU_LOOP(parallelism='[seq]')
-                                do i = 1, num_fluids
-                                    rho_L = rho_L + alpha_rho_L(i)
-                                    gamma_L = gamma_L + alpha_L(i)*gammas(i)
-                                    pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i)
-                                    qv_L = qv_L + alpha_rho_L(i)*qvs(i)
-
-                                    rho_R = rho_R + alpha_rho_R(i)
-                                    gamma_R = gamma_R + alpha_R(i)*gammas(i)
-                                    pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i)
-                                    qv_R = qv_R + alpha_rho_R(i)*qvs(i)
-                                end do
+                                call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, &
+                                                                     & qv_L)
+                                call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, &
+                                                                     & qv_R)
 
                                 pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E)
                                 pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E)
@@ -740,12 +719,10 @@ contains
                                         end do
                                         ! Recalculating the radial momentum geometric source flux
                                         flux_gsrc_rsx_vf(${SF('')}$, &
-                                                         & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) &
-                                                         & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp &
-                                                         & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) &
-                                                         & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) &
-                                                         & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_R(dir_idx(1))) - vel_R(dir_idx(1)))))
+                                                         & eqn_idx%cont%end + dir_idx(1)) &
+                                                         & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), &
+                                                         & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, &
+                                                         & dir_flg(dir_idx(1)))
                                         ! Geometrical source of the void fraction(s) is zero
                                         $:GPU_LOOP(parallelism='[seq]')
                                         do i = eqn_idx%adv%beg, eqn_idx%adv%end
@@ -760,12 +737,9 @@ contains
                                             flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp
                                         end do
                                         flux_gsrc_rsx_vf(${SF('')}$, &
-                                                         & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) &
-                                                         & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) &
-                                                         & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) &
-                                                         & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_R(dir_idx(1))) - vel_R(dir_idx(1)))))
+                                                         & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, &
+                                                         & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, &
+                                                         & xi_R, xi_M, xi_P, dir_flg(dir_idx(1)))
                                         flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1)
                                     end if
                                 #:endif
@@ -776,13 +750,13 @@ contains
                 else if (model_eqns == model_eqns_5eq .and. bubbles_euler) then
                     ! 5-equation model with Euler-Euler bubble dynamics
                     $:GPU_PARALLEL_LOOP(collapse=3, private='[i, q, R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, &
-                                        & vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, Re_L, Re_R, pcorr, zcoef, rho_L, &
-                                        & rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, &
-                                        & qv_R, qv_avg, c_L, c_R, c_avg, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, &
-                                        & Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, &
-                                        & xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, &
-                                        & R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, &
-                                        & Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]')
+                                        & vel_R, rho_avg, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, h_avg, gamma_avg, Re_L, &
+                                        & Re_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, gamma_L, gamma_R, &
+                                        & pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, c_avg, vel_L_rms, vel_R_rms, &
+                                        & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, &
+                                        & alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, &
+                                        & xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, &
+                                        & Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]')
                     do l = ${Z_BND}$%beg, ${Z_BND}$%end
                         do k = ${Y_BND}$%beg, ${Y_BND}$%end
                             do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -794,6 +768,8 @@ contains
 
                                 $:GPU_LOOP(parallelism='[seq]')
                                 do i = 1, num_fluids
+                                    alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i)
+                                    alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i)
                                     alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)
                                     alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)
                                 end do
@@ -810,29 +786,15 @@ contains
 
                                 ! Retain this in the refactor
                                 if (mpp_lim .and. (num_fluids > 2)) then
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, num_fluids
-                                        rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i)
-                                        gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i)
-                                        pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i)
-                                        qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i)
-                                        rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)
-                                        gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i)
-                                        pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i)
-                                        qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i)
-                                    end do
+                                    call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, &
+                                                                         & pi_inf_L, qv_L)
+                                    call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, &
+                                                                         & pi_inf_R, qv_R)
                                 else if (num_fluids > 2) then
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, num_fluids - 1
-                                        rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i)
-                                        gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i)
-                                        pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i)
-                                        qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i)
-                                        rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)
-                                        gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i)
-                                        pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i)
-                                        qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i)
-                                    end do
+                                    call s_accumulate_mixture_properties(num_fluids - 1, alpha_rho_L, alpha_L, rho_L, gamma_L, &
+                                                                         & pi_inf_L, qv_L)
+                                    call s_accumulate_mixture_properties(num_fluids - 1, alpha_rho_R, alpha_R, rho_R, gamma_R, &
+                                                                         & pi_inf_R, qv_R)
                                 else
                                     rho_L = qL_prim_rsx_vf(${SF('')}$, 1)
                                     gamma_L = gammas(1)
@@ -1135,12 +1097,10 @@ contains
                                         end do
                                         ! Recalculating the radial momentum geometric source flux
                                         flux_gsrc_rsx_vf(${SF('')}$, &
-                                                         & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) &
-                                                         & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp &
-                                                         & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) &
-                                                         & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) &
-                                                         & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_R(dir_idx(1))) - vel_R(dir_idx(1)))))
+                                                         & eqn_idx%cont%end + dir_idx(1)) &
+                                                         & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), &
+                                                         & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, &
+                                                         & dir_flg(dir_idx(1)))
                                         ! Geometrical source of the void fraction(s) is zero
                                         $:GPU_LOOP(parallelism='[seq]')
                                         do i = eqn_idx%adv%beg, eqn_idx%adv%end
@@ -1156,12 +1116,9 @@ contains
                                         end do
 
                                         flux_gsrc_rsx_vf(${SF('')}$, &
-                                                         & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) &
-                                                         & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) &
-                                                         & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) &
-                                                         & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_R(dir_idx(1))) - vel_R(dir_idx(1)))))
+                                                         & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, &
+                                                         & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, &
+                                                         & xi_R, xi_M, xi_P, dir_flg(dir_idx(1)))
                                         flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1)
                                     end if
                                 #:endif
@@ -1171,15 +1128,15 @@ contains
                     $:END_GPU_PARALLEL_LOOP()
                 else
                     ! 5-equation model (model_eqns=2): mixture total energy, volume fraction advection
-                    $:GPU_PARALLEL_LOOP(collapse=3, private='[Re_max, i, q, T_L, T_R, vel_L_rms, vel_R_rms, pres_L, pres_R, &
-                                        & rho_L, gamma_L, pi_inf_L, qv_L, rho_R, gamma_R, pi_inf_R, qv_R, alpha_L_sum, &
-                                        & alpha_R_sum, E_L, E_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, &
-                                        & Gamm_R, Y_L, Y_R, H_L, H_R, qv_avg, rho_avg, gamma_avg, H_avg, c_L, c_R, c_avg, s_P, &
-                                        & s_M, xi_P, xi_M, xi_L, xi_R, xi_L_m1, xi_R_m1, Ms_L, Ms_R, pres_SL, pres_SR, vel_L, &
-                                        & vel_R, Re_L, Re_R, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, &
-                                        & vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, &
-                                        & tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, G_L, &
-                                        & G_R]', copyin='[is1, is2, is3]')
+                    $:GPU_PARALLEL_LOOP(collapse=3, private='[i, T_L, T_R, vel_L_rms, vel_R_rms, pres_L, pres_R, rho_L, gamma_L, &
+                                        & pi_inf_L, qv_L, rho_R, gamma_R, pi_inf_R, qv_R, alpha_L_sum, alpha_R_sum, E_L, E_R, &
+                                        & MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, Y_L, Y_R, H_L, &
+                                        & H_R, qv_avg, rho_avg, gamma_avg, H_avg, c_L, c_R, c_avg, s_P, s_M, xi_P, xi_M, xi_L, &
+                                        & xi_R, xi_L_m1, xi_R_m1, Ms_L, Ms_R, pres_SL, pres_SR, vel_L, vel_R, Re_L, Re_R, &
+                                        & alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, alpha_lim_L, alpha_lim_R, s_L, s_R, s_S, &
+                                        & vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, &
+                                        & Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, &
+                                        & h_iL, h_iR, h_avg_2, G_L, G_R]', copyin='[is1, is2, is3]')
                     do l = ${Z_BND}$%beg, ${Z_BND}$%end
                         do k = ${Y_BND}$%beg, ${Y_BND}$%end
                             do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -1230,38 +1187,24 @@ contains
                                     end do
                                 end if
 
+                                ! Post-limiter loads for the mixture properties; alpha_L/R keep the pre-limiter loads used
+                                ! downstream
                                 $:GPU_LOOP(parallelism='[seq]')
                                 do i = 1, num_fluids
-                                    rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i)
-                                    gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i)
-                                    pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i)
-                                    qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i)
-
-                                    rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)
-                                    gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i)
-                                    pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i)
-                                    qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i)
+                                    alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i)
+                                    alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i)
+                                    alpha_lim_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)
+                                    alpha_lim_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)
                                 end do
 
-                                Re_max = 0
-                                if (Re_size(1) > 0) Re_max = 1
-                                if (Re_size(2) > 0) Re_max = 2
+                                call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_lim_L, rho_L, gamma_L, &
+                                                                     & pi_inf_L, qv_L)
+                                call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_lim_R, rho_R, gamma_R, &
+                                                                     & pi_inf_R, qv_R)
 
                                 if (viscous) then
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, Re_max
-                                        Re_L(i) = 0._wp
-                                        Re_R(i) = 0._wp
-
-                                        $:GPU_LOOP(parallelism='[seq]')
-                                        do q = 1, Re_size(i)
-                                            Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
-                                            Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
-                                        end do
-
-                                        Re_L(i) = 1._wp/max(Re_L(i), sgm_eps)
-                                        Re_R(i) = 1._wp/max(Re_R(i), sgm_eps)
-                                    end do
+                                    call s_compute_interface_reynolds(alpha_L, Re_L)
+                                    call s_compute_interface_reynolds(alpha_R, Re_R)
                                 end if
 
                                 if (chemistry) then
@@ -1588,12 +1531,10 @@ contains
                                         end do
                                         ! Recalculating the radial momentum geometric source flux
                                         flux_gsrc_rsx_vf(${SF('')}$, &
-                                                         & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) &
-                                                         & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp &
-                                                         & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) &
-                                                         & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) &
-                                                         & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_R(dir_idx(1))) - vel_R(dir_idx(1)))))
+                                                         & eqn_idx%cont%end + dir_idx(1)) &
+                                                         & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), &
+                                                         & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, &
+                                                         & dir_flg(dir_idx(1)))
                                         ! Geometrical source of the void fraction(s) is zero
                                         $:GPU_LOOP(parallelism='[seq]')
                                         do i = eqn_idx%adv%beg, eqn_idx%adv%end
@@ -1609,12 +1550,9 @@ contains
                                         end do
 
                                         flux_gsrc_rsx_vf(${SF('')}$, &
-                                                         & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) &
-                                                         & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) &
-                                                         & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) &
-                                                         & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) &
-                                                         & *vel_R(dir_idx(1))) - vel_R(dir_idx(1)))))
+                                                         & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, &
+                                                         & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, &
+                                                         & xi_R, xi_M, xi_P, dir_flg(dir_idx(1)))
                                         flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1)
                                     end if
                                 #:endif
diff --git a/src/simulation/m_riemann_solver_lf.fpp b/src/simulation/m_riemann_solver_lf.fpp
index c3c3aaa959..e829edc125 100644
--- a/src/simulation/m_riemann_solver_lf.fpp
+++ b/src/simulation/m_riemann_solver_lf.fpp
@@ -95,7 +95,7 @@ contains
         type(riemann_states)      :: vdotB, B2
         type(riemann_states_vec3) :: b4              !< 4-magnetic field components (spatial: b4x, b4y, b4z)
         type(riemann_states_vec3) :: cm              !< Conservative momentum variables
-        integer                   :: i, j, k, l, q   !< Generic loop iterators
+        integer                   :: i, j, k, l      !< Generic loop iterators
         integer, dimension(3)     :: idx_right_phys  !< Physical (j,k,l) indices for right state.
         ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions
 
@@ -111,9 +111,9 @@ contains
             #:set SV = STENCIL_VAR
             #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs)
             if (norm_dir == ${NORM_DIR}$) then
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, &
-                                    & alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, &
-                                    & Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, &
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, &
+                                    & tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, &
+                                    & Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, &
                                     & Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, &
                                     & vel_grad_L, vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, &
                                     & vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, &
@@ -197,37 +197,12 @@ contains
                                 alpha_R = alpha_R/max(alpha_R_sum, sgm_eps)
                             end if
 
-                            $:GPU_LOOP(parallelism='[seq]')
-                            do i = 1, num_fluids
-                                rho_L = rho_L + alpha_rho_L(i)
-                                gamma_L = gamma_L + alpha_L(i)*gammas(i)
-                                pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i)
-                                qv_L = qv_L + alpha_rho_L(i)*qvs(i)
-
-                                rho_R = rho_R + alpha_rho_R(i)
-                                gamma_R = gamma_R + alpha_R(i)*gammas(i)
-                                pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i)
-                                qv_R = qv_R + alpha_rho_R(i)*qvs(i)
-                            end do
+                            call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, qv_L)
+                            call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, qv_R)
 
                             if (viscous) then
-                                $:GPU_LOOP(parallelism='[seq]')
-                                do i = 1, 2
-                                    Re_L(i) = dflt_real
-                                    Re_R(i) = dflt_real
-
-                                    if (Re_size(i) > 0) Re_L(i) = 0._wp
-                                    if (Re_size(i) > 0) Re_R(i) = 0._wp
-
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do q = 1, Re_size(i)
-                                        Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
-                                        Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
-                                    end do
-
-                                    Re_L(i) = 1._wp/max(Re_L(i), sgm_eps)
-                                    Re_R(i) = 1._wp/max(Re_R(i), sgm_eps)
-                                end do
+                                call s_compute_interface_reynolds(alpha_L, Re_L)
+                                call s_compute_interface_reynolds(alpha_R, Re_R)
                             end if
 
                             if (chemistry) then
@@ -645,23 +620,8 @@ contains
                             end do
                         end if
 
-                        $:GPU_LOOP(parallelism='[seq]')
-                        do i = 1, 2
-                            Re_L(i) = dflt_real
-                            Re_R(i) = dflt_real
-
-                            if (Re_size(i) > 0) Re_L(i) = 0._wp
-                            if (Re_size(i) > 0) Re_R(i) = 0._wp
-
-                            $:GPU_LOOP(parallelism='[seq]')
-                            do q = 1, Re_size(i)
-                                Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
-                                Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
-                            end do
-
-                            Re_L(i) = 1._wp/max(Re_L(i), sgm_eps)
-                            Re_R(i) = 1._wp/max(Re_R(i), sgm_eps)
-                        end do
+                        call s_compute_interface_reynolds(alpha_L, Re_L)
+                        call s_compute_interface_reynolds(alpha_R, Re_R)
 
                         if (shear_stress) then
                             $:GPU_LOOP(parallelism='[seq]')
diff --git a/src/simulation/m_riemann_state.fpp b/src/simulation/m_riemann_state.fpp
index 595f785d39..bde913675d 100644
--- a/src/simulation/m_riemann_state.fpp
+++ b/src/simulation/m_riemann_state.fpp
@@ -1016,6 +1016,83 @@ contains
 
     end subroutine s_calculate_bulk_stress_tensor
 
+    !> Accumulate the mixture density, specific heat ratio function, liquid stiffness function, and internal energy reference of one
+    !! Riemann state from its partial densities and volume fractions. The number of fluids is an explicit argument because the
+    !! 5-equation bubble model accumulates over num_fluids - 1 fluids.
+    subroutine s_accumulate_mixture_properties(nf, alpha_rho_K, alpha_K, rho_K, gamma_K, pi_inf_K, qv_K)
+
+        $:GPU_ROUTINE(function_name='s_accumulate_mixture_properties', parallelism='[seq]', cray_inline=True)
+
+        integer, intent(in)                 :: nf  !< Number of fluids to accumulate over
+        real(wp), dimension(nf), intent(in) :: alpha_rho_K, alpha_K
+        real(wp), intent(out)               :: rho_K, gamma_K, pi_inf_K, qv_K
+        integer                             :: i   !< Loop iterator over fluids
+
+        rho_K = 0._wp
+        gamma_K = 0._wp
+        pi_inf_K = 0._wp
+        qv_K = 0._wp
+
+        $:GPU_LOOP(parallelism='[seq]')
+        do i = 1, nf
+            rho_K = rho_K + alpha_rho_K(i)
+            gamma_K = gamma_K + alpha_K(i)*gammas(i)
+            pi_inf_K = pi_inf_K + alpha_K(i)*pi_infs(i)
+            qv_K = qv_K + alpha_rho_K(i)*qvs(i)
+        end do
+
+    end subroutine s_accumulate_mixture_properties
+
+    !> Compute the shear and volume Reynolds numbers of one Riemann state by inverse-weighting the fluid Reynolds numbers with the
+    !! volume fractions.
+    subroutine s_compute_interface_reynolds(alpha_K, Re_K)
+
+        $:GPU_ROUTINE(function_name='s_compute_interface_reynolds', parallelism='[seq]', cray_inline=True)
+
+        #:if not MFC_CASE_OPTIMIZATION and USING_AMD
+            real(wp), dimension(3), intent(in) :: alpha_K
+        #:else
+            real(wp), dimension(num_fluids), intent(in) :: alpha_K
+        #:endif
+        real(wp), dimension(2), intent(out) :: Re_K
+        integer                             :: i, q  !< Loop iterators
+
+        $:GPU_LOOP(parallelism='[seq]')
+        do i = 1, 2
+            Re_K(i) = dflt_real
+
+            if (Re_size(i) > 0) Re_K(i) = 0._wp
+
+            $:GPU_LOOP(parallelism='[seq]')
+            do q = 1, Re_size(i)
+                Re_K(i) = alpha_K(Re_idx(i, q))/Res_gs(i, q) + Re_K(i)
+            end do
+
+            Re_K(i) = 1._wp/max(Re_K(i), sgm_eps)
+        end do
+
+    end subroutine s_compute_interface_reynolds
+
+    !> Compute the advective part of the HLLC star-state momentum flux in the wave-normal direction (pressure excluded), used to
+    !! assemble the geometrical source flux of the cylindrical and azimuthal sweeps.
+    function f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L_norm, vel_R_norm, s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, &
+        & dir_flg_norm) result(flux_mom)
+
+        $:GPU_ROUTINE(function_name='f_compute_hllc_star_momentum_flux', parallelism='[seq]', cray_inline=True)
+
+        real(wp), intent(in) :: rho_L, rho_R            !< Left and right densities
+        real(wp), intent(in) :: vel_L_norm, vel_R_norm  !< Left and right wave-normal velocities
+        real(wp), intent(in) :: s_M, s_P, s_S           !< Clamped left/right and contact wave speeds
+        real(wp), intent(in) :: xi_L, xi_R, xi_M, xi_P  !< Star-state compression factors and upwind selectors
+        real(wp), intent(in) :: dir_flg_norm            !< Direction flag of the wave-normal direction
+        real(wp)             :: flux_mom
+
+        flux_mom = xi_M*(rho_L*(vel_L_norm*vel_L_norm + s_M*(xi_L*(dir_flg_norm*s_S + (1._wp - dir_flg_norm)*vel_L_norm) &
+                         & - vel_L_norm))) + xi_P*(rho_R*(vel_R_norm*vel_R_norm + s_P*(xi_R*(dir_flg_norm*s_S + (1._wp &
+                         & - dir_flg_norm)*vel_R_norm) - vel_R_norm)))
+
+    end function f_compute_hllc_star_momentum_flux
+
     !> Deallocation and/or disassociation procedures that are needed to finalize the selected Riemann problem solver
     subroutine s_finalize_riemann_solver(flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir)
 

From b26302b54567d73993eaf5e124a885d40d84e1b8 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 11 Jun 2026 16:56:49 -0400
Subject: [PATCH 3/7] src: unify hypoelastic interface energy into a shared
 helper; delete dead solver copies

Implements Task 2 of the Phase-3 Riemann decomposition plan per Spencer's rulings (2026-06-11) on the BLOCKED-ON-PHYSICS adjudication:

1. Energy gate: the helper gates on G > verysmall, not HLL's hard-coded 1000 floor; HLL's TODO asking for its removal is honored and deleted. This is a deliberate behavior change for soft-G (G in (verysmall, 1000]) hypoelastic runs; golden tests are unaffected (test shear moduli are 5e4/1e5).

2. Damage scaling: the (1 - damage) factor applies whenever cont_damage is on (HLL's existing behavior); the helper carries it.

3. Dead copies: the hypoelastic energy-loading blocks in HLLC (both instances) and LF are deleted, since case_validator.py:577 restricts hypoelasticity to the HLL solver. LF's G_L/G_R locals became unused and are removed from the declarations and the private list; HLLC's tau_e/G locals remain in use by its hyperelastic and elastic flux/wave-speed code.

4. Wave-speed corrections are untouched in all solvers; the hyper-tau_e divergence is filed upstream separately.

The helper uses the shear_indices-based energy doubling (HLL's dimension-correct form, not the hard-coded i==2/4/5). The caller loads the SF-indexed stress and damage values and passes them in, keeping tau_e_L/R available for the stress-flux and wave-speed blocks; the mixture moduli G_L/G_R are intent(out) for the elastic wave speeds.
---
 src/simulation/m_riemann_solver_hll.fpp  | 37 +++++----------
 src/simulation/m_riemann_solver_hllc.fpp | 57 ------------------------
 src/simulation/m_riemann_solver_lf.fpp   | 48 ++++----------------
 src/simulation/m_riemann_state.fpp       | 48 +++++++++++++++++++-
 4 files changed, 67 insertions(+), 123 deletions(-)

diff --git a/src/simulation/m_riemann_solver_hll.fpp b/src/simulation/m_riemann_solver_hll.fpp
index 7752f8c813..d46a770fc1 100644
--- a/src/simulation/m_riemann_solver_hll.fpp
+++ b/src/simulation/m_riemann_solver_hll.fpp
@@ -75,6 +75,7 @@ contains
         real(wp)                  :: c_L, c_R
         real(wp), dimension(6)    :: tau_e_L, tau_e_R
         real(wp)                  :: G_L, G_R
+        real(wp)                  :: damage_L, damage_R
         real(wp), dimension(2)    :: Re_L, Re_R
         real(wp), dimension(3)    :: xi_field_L, xi_field_R
         real(wp)                  :: rho_avg
@@ -117,9 +118,9 @@ contains
                                     & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, rho_L, rho_R, &
                                     & pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, &
                                     & Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, &
-                                    & gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, &
-                                    & gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, Ms_L, Ms_R, pres_SL, &
-                                    & pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', copyin='[norm_dir]')
+                                    & gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, damage_L, damage_R, &
+                                    & rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, &
+                                    & Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', copyin='[norm_dir]')
                 do l = ${Z_BND}$%beg, ${Z_BND}$%end
                     do k = ${Y_BND}$%beg, ${Y_BND}$%end
                         do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -298,34 +299,20 @@ contains
 
                             ! elastic energy update
                             if (hypoelasticity) then
-                                G_L = 0._wp; G_R = 0._wp
-
                                 $:GPU_LOOP(parallelism='[seq]')
-                                do i = 1, num_fluids
-                                    G_L = G_L + alpha_L(i)*Gs_rs(i)
-                                    G_R = G_R + alpha_R(i)*Gs_rs(i)
+                                do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
+                                    tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i)
+                                    tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i)
                                 end do
 
+                                damage_L = 0._wp; damage_R = 0._wp
                                 if (cont_damage) then
-                                    G_L = G_L*max((1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp)
-                                    G_R = G_R*max((1._wp - qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp)
+                                    damage_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)
+                                    damage_R = qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)
                                 end if
 
-                                $:GPU_LOOP(parallelism='[seq]')
-                                do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
-                                    tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i)
-                                    tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i)
-                                    ! Elastic contribution to energy if G large enough TODO take out if statement if stable without
-                                    if ((G_L > 1000) .and. (G_R > 1000)) then
-                                        E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
-                                        E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
-                                        ! Double for shear stresses
-                                        if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then
-                                            E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
-                                            E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
-                                        end if
-                                    end if
-                                end do
+                                call s_compute_hypoelastic_interface_energy(num_fluids, alpha_L, alpha_R, damage_L, damage_R, &
+                                    & tau_e_L, tau_e_R, G_L, G_R, E_L, E_R)
                             end if
 
                             @:compute_average_state()
diff --git a/src/simulation/m_riemann_solver_hllc.fpp b/src/simulation/m_riemann_solver_hllc.fpp
index 5418d5a58a..55dea991ea 100644
--- a/src/simulation/m_riemann_solver_hllc.fpp
+++ b/src/simulation/m_riemann_solver_hllc.fpp
@@ -232,34 +232,6 @@ contains
                                 E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L
                                 E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R
 
-                                ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY
-                                if (hypoelasticity) then
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
-                                        tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i)
-                                        tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i)
-                                    end do
-                                    G_L = 0._wp; G_R = 0._wp
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, num_fluids
-                                        G_L = G_L + alpha_L(i)*Gs_rs(i)
-                                        G_R = G_R + alpha_R(i)*Gs_rs(i)
-                                    end do
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
-                                        ! Elastic contribution to energy if G large enough
-                                        if ((G_L > verysmall) .and. (G_R > verysmall)) then
-                                            E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
-                                            E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
-                                            ! Additional terms in 2D and 3D
-                                            if ((i == 2) .or. (i == 4) .or. (i == 5)) then
-                                                E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
-                                                E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
-                                            end if
-                                        end if
-                                    end do
-                                end if
-
                                 ! Hyperelastic stress contribution: strain energy added to total energy
                                 if (hyperelasticity) then
                                     $:GPU_LOOP(parallelism='[seq]')
@@ -1263,35 +1235,6 @@ contains
                                     H_R = (E_R + pres_R)/rho_R
                                 end if
 
-                                ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY
-                                if (hypoelasticity) then
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
-                                        tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i)
-                                        tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i)
-                                    end do
-                                    G_L = 0._wp
-                                    G_R = 0._wp
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, num_fluids
-                                        G_L = G_L + alpha_L(i)*Gs_rs(i)
-                                        G_R = G_R + alpha_R(i)*Gs_rs(i)
-                                    end do
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
-                                        ! Elastic contribution to energy if G large enough
-                                        if ((G_L > verysmall) .and. (G_R > verysmall)) then
-                                            E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
-                                            E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
-                                            ! Additional terms in 2D and 3D
-                                            if ((i == 2) .or. (i == 4) .or. (i == 5)) then
-                                                E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
-                                                E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
-                                            end if
-                                        end if
-                                    end do
-                                end if
-
                                 ! Hyperelastic stress contribution: strain energy added to total energy
                                 if (hyperelasticity) then
                                     $:GPU_LOOP(parallelism='[seq]')
diff --git a/src/simulation/m_riemann_solver_lf.fpp b/src/simulation/m_riemann_solver_lf.fpp
index e829edc125..ce2d6cd237 100644
--- a/src/simulation/m_riemann_solver_lf.fpp
+++ b/src/simulation/m_riemann_solver_lf.fpp
@@ -74,7 +74,6 @@ contains
         real(wp)                  :: qv_L, qv_R
         real(wp)                  :: c_L, c_R
         real(wp), dimension(6)    :: tau_e_L, tau_e_R
-        real(wp)                  :: G_L, G_R
         real(wp), dimension(2)    :: Re_L, Re_R
         real(wp), dimension(3)    :: xi_field_L, xi_field_R
         real(wp)                  :: rho_avg
@@ -112,14 +111,14 @@ contains
             #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs)
             if (norm_dir == ${NORM_DIR}$) then
                 $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, &
-                                    & tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, &
-                                    & Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, &
-                                    & Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, &
-                                    & vel_grad_L, vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, &
-                                    & vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, &
-                                    & rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, &
-                                    & H_R, ptilde_L, ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, &
-                                    & Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]')
+                                    & tau_e_L, tau_e_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, &
+                                    & xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, &
+                                    & h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_grad_L, &
+                                    & vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, &
+                                    & Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, rho_L, rho_R, &
+                                    & gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, H_R, ptilde_L, &
+                                    & ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, Cp_L, Cp_R, Cv_L, &
+                                    & Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]')
                 do l = ${Z_BND}$%beg, ${Z_BND}$%end
                     do k = ${Y_BND}$%beg, ${Y_BND}$%end
                         do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -294,37 +293,6 @@ contains
                                 H_R = (E_R + pres_R)/rho_R
                             end if
 
-                            ! elastic energy update
-                            if (hypoelasticity) then
-                                G_L = 0._wp; G_R = 0._wp
-
-                                $:GPU_LOOP(parallelism='[seq]')
-                                do i = 1, num_fluids
-                                    G_L = G_L + alpha_L(i)*Gs_rs(i)
-                                    G_R = G_R + alpha_R(i)*Gs_rs(i)
-                                end do
-
-                                if (cont_damage) then
-                                    G_L = G_L*max((1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp)
-                                    G_R = G_R*max((1._wp - qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp)
-                                end if
-
-                                do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
-                                    tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i)
-                                    tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i)
-                                    ! Elastic contribution to energy if G large enough TODO take out if statement if stable without
-                                    if ((G_L > 1000) .and. (G_R > 1000)) then
-                                        E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
-                                        E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
-                                        ! Double for shear stresses
-                                        if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then
-                                            E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
-                                            E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
-                                        end if
-                                    end if
-                                end do
-                            end if
-
                             call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, vel_L_rms, 0._wp, c_L, &
                                                           & qv_L)
 
diff --git a/src/simulation/m_riemann_state.fpp b/src/simulation/m_riemann_state.fpp
index bde913675d..38e15ce995 100644
--- a/src/simulation/m_riemann_state.fpp
+++ b/src/simulation/m_riemann_state.fpp
@@ -11,7 +11,7 @@ module m_riemann_state
 
     use m_derived_types
     use m_global_parameters
-    use m_constants, only: riemann_solver_hll, riemann_solver_hlld
+    use m_constants, only: riemann_solver_hll, riemann_solver_hlld, verysmall
     use m_hb_function
 
     implicit none
@@ -1073,6 +1073,52 @@ contains
 
     end subroutine s_compute_interface_reynolds
 
+    !> Accumulate the hypoelastic stress contribution to the energies of the left and right Riemann states: mix the shear modulus
+    !! over the fluids, scale it by the continuum damage state when damage is modeled, and add the elastic energy of each stress
+    !! component (doubled for the shear components) when both mixture moduli are non-negligible. The elastic shear stresses are
+    !! loaded from the state buffers by the caller, which reuses them for the stress fluxes and elastic wave speeds.
+    subroutine s_compute_hypoelastic_interface_energy(nf, alpha_L, alpha_R, damage_L, damage_R, tau_e_L, tau_e_R, G_L, G_R, E_L, &
+        & E_R)
+
+        $:GPU_ROUTINE(function_name='s_compute_hypoelastic_interface_energy', parallelism='[seq]', cray_inline=True)
+
+        integer, intent(in)                 :: nf                  !< Number of fluids to mix the shear modulus over
+        real(wp), dimension(nf), intent(in) :: alpha_L, alpha_R    !< Left and right volume fractions
+        real(wp), intent(in)                :: damage_L, damage_R  !< Continuum damage states (referenced only when cont_damage)
+        real(wp), dimension(6), intent(in)  :: tau_e_L, tau_e_R    !< Left and right elastic shear stresses
+        real(wp), intent(out)               :: G_L, G_R            !< Left and right mixture shear moduli
+        real(wp), intent(inout)             :: E_L, E_R            !< Left and right state energies
+        integer                             :: i                   !< Loop iterator
+
+        G_L = 0._wp; G_R = 0._wp
+
+        $:GPU_LOOP(parallelism='[seq]')
+        do i = 1, nf
+            G_L = G_L + alpha_L(i)*Gs_rs(i)
+            G_R = G_R + alpha_R(i)*Gs_rs(i)
+        end do
+
+        if (cont_damage) then
+            G_L = G_L*max((1._wp - damage_L), 0._wp)
+            G_R = G_R*max((1._wp - damage_R), 0._wp)
+        end if
+
+        $:GPU_LOOP(parallelism='[seq]')
+        do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
+            ! Elastic contribution to energy if G large enough
+            if ((G_L > verysmall) .and. (G_R > verysmall)) then
+                E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
+                E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
+                ! Double for shear stresses
+                if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then
+                    E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L)
+                    E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R)
+                end if
+            end if
+        end do
+
+    end subroutine s_compute_hypoelastic_interface_energy
+
     !> Compute the advective part of the HLLC star-state momentum flux in the wave-normal direction (pressure excluded), used to
     !! assemble the geometrical source flux of the cylindrical and azimuthal sweeps.
     function f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L_norm, vel_R_norm, s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, &

From f1ffcbead7a63735138a58aa23de21643ca35e2d Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 11 Jun 2026 17:43:50 -0400
Subject: [PATCH 4/7] docs: document the GPU_ROUTINE helper pattern; record
 Phase-3 completion

---
 docs/documentation/gpuParallelization.md | 57 ++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md
index 489ad42b12..4830b480f0 100644
--- a/docs/documentation/gpuParallelization.md
+++ b/docs/documentation/gpuParallelization.md
@@ -607,6 +607,63 @@ Does not do anything for OpenMP currently
 
 ------------------------------------------------------------------------------------------
 
+## Writing GPU_ROUTINE Device Helpers
+
+`GPU_ROUTINE` marks a subroutine or function as callable from within a GPU kernel
+(an OpenACC `routine` or OpenMP `declare target` directive). The standard idiom for
+pure, sequential per-thread helpers is:
+
+```fortran
+subroutine s_accumulate_mixture_properties(nf, alpha_rho_K, alpha_K, rho_K, gamma_K, pi_inf_K, qv_K)
+
+    $:GPU_ROUTINE(function_name='s_accumulate_mixture_properties', parallelism='[seq]', cray_inline=True)
+
+    integer, intent(in)                 :: nf
+    real(wp), dimension(nf), intent(in) :: alpha_rho_K, alpha_K
+    real(wp), intent(out)               :: rho_K, gamma_K, pi_inf_K, qv_K
+    ...
+end subroutine s_accumulate_mixture_properties
+```
+
+**When to use it.** Extract a block into a `GPU_ROUTINE` helper when:
+
+- It does roughly 50–100 or more FLOPs of work per call, enough to amortize the
+  call overhead on device. Below that threshold, the helper is justified only when
+  the same block appears verbatim in three or more callers (duplication removal).
+- It is called from inside a `GPU_PARALLEL_LOOP` region — ``parallelism='[seq]'`` is
+  correct for sequential per-thread work; the surrounding loop keeps the parallelism.
+
+**Key idioms.**
+
+- Always ``parallelism='[seq]'`` and `cray_inline=True` for these helpers. The Cray
+  compiler does not inline cross-file `routine` calls without `cray_inline`; without
+  it, performance collapses silently on that backend.
+- `function_name=` is required when `cray_inline=True` (the Cray inline directive
+  needs the explicit name).
+- The `GPU_ROUTINE` directive lives in the *definition*, not the call site. The
+  caller needs no annotation beyond being inside a `GPU_PARALLEL_LOOP`.
+
+**Caller-loads, helper-computes.** Callers do all coordinate-indexed array loads
+from the global state arrays (`qL_rs_vf`, `qR_rs_vf`, etc.) before the call; the
+helper receives only scalars or small arrays with explicit-shape dimensioning. This
+is required because the `SF` indexing lambda used in solver loops is defined locally
+inside each solver's `#:for NORM_DIR` block and cannot be referenced from a helper.
+
+**AMD case-opt compatibility.** Under `--case-optimization` with the AMD backend,
+arrays that are sized by runtime parameters at compile time must be declared with an
+explicit constant bound. Use an explicit `n` argument (e.g., `integer, intent(in) ::
+nf`) and dimension helpers as `dimension(nf)` rather than `dimension(num_fluids)`.
+See `s_compute_interface_reynolds` in `src/simulation/m_riemann_state.fpp` for the
+`#:if not MFC_CASE_OPTIMIZATION and USING_AMD` guard pattern where the caller-side
+dimensioning differs.
+
+**Declare scoping.** The `GPU_ROUTINE` directive must appear in the source file
+that defines the routine. Helpers added to `m_riemann_state.fpp` are automatically
+in scope for every solver module that `use`s it — no additional declare-target
+annotations are needed at call sites.
+
+------------------------------------------------------------------------------------------
+
 # Debugging Tools and Tips for GPUs
 
 ## Compiler agnostic tools

From a577f15e05557d17f4e26649bead4ed0ab22a5fd Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Thu, 11 Jun 2026 19:53:22 -0400
Subject: [PATCH 5/7] build: fix per-target loop and vestigial lines in
 MFCTargets

Flagged by Copilot review on #1572; all pre-existing on master, carried verbatim by the CMakeLists split. Unified-memory flags (NVHPC managedalloc pair, FRONTIER_UNIFIED x2) now apply to a_target so the NVHPC two-pass IPO lib target gets them too; the set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY]) line is deleted outright - set(ENV) only affects the configure-time process so it never influenced built binaries, and its bracketed value was a literal besides; a leftover debug message(STATUS) removed.
---
 cmake/MFCTargets.cmake | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/cmake/MFCTargets.cmake b/cmake/MFCTargets.cmake
index 5242fe9fd8..3e848aca4f 100644
--- a/cmake/MFCTargets.cmake
+++ b/cmake/MFCTargets.cmake
@@ -74,7 +74,6 @@ exit 0
 
     foreach (a_target ${IPO_TARGETS})
         set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON)
-	    message(STATUS ${CMAKE_Fortran_COMPILER_ID})
 
         target_include_directories(${a_target} PRIVATE
             "${CMAKE_SOURCE_DIR}/src/common"
@@ -161,7 +160,6 @@ exit 0
                 if (NOT TARGET OpenMP::OpenMP_Fortran)
                     message(FATAL_ERROR "OpenMP + Fortran is unsupported.")
                 endif()
-                set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY])
                 # target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran)
                 target_compile_definitions(${a_target} PRIVATE MFC_OpenMP MFC_GPU)
 
@@ -209,11 +207,11 @@ exit 0
 
                 # GH-200 Unified Memory Support
                 if (MFC_Unified)
-                    target_compile_options(${ARGS_TARGET}
+                    target_compile_options(${a_target}
                         PRIVATE -gpu=mem:unified:managedalloc -cuda
                     )
                     # "This option must appear in both the compile and link lines" -- NVHPC Docs
-                    target_link_options(${ARGS_TARGET}
+                    target_link_options(${a_target}
                         PRIVATE -gpu=mem:unified:managedalloc -cuda
                     )
                 endif()
@@ -226,7 +224,7 @@ exit 0
             elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
                 # Frontier Unified Memory Support
                 if (MFC_Unified)
-                    target_compile_options(${ARGS_TARGET}
+                    target_compile_options(${a_target}
                         PRIVATE -DFRONTIER_UNIFIED)
                 endif()
 
@@ -239,7 +237,7 @@ exit 0
             elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
 
                 if (MFC_Unified)
-                    target_compile_options(${ARGS_TARGET}
+                    target_compile_options(${a_target}
                         PRIVATE -DFRONTIER_UNIFIED)
                 endif()
 

From fc7f770c719f6668b09b97a6e6a8bfcaf47cc28b Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Fri, 12 Jun 2026 00:07:53 -0400
Subject: [PATCH 6/7] src: delete the remaining validator-dead hypoelastic
 blocks in LF and HLLC; doc fixes

Guard inventory (every tau_e/G consumer in the two files was classified by its
exact guard before deletion):
- LF: hypoelastic momentum flux (else if hypoelasticity), hypoelastic energy
  flux (else if hypoelasticity), stress evolution flux (if hypoelasticity),
  cylindrical tau_sigmasigma source (if cyl_coord .and. hypoelasticity) - all
  deleted.
- HLLC: stress evolution flux (if hypoelasticity), both 6-eqn and 5-eqn copies
  - deleted. The hyperelasticity-gated tau_e/G/xi_field loads and the
  elasticity-gated wave speeds and elastic momentum/energy fluxes are
  untouched (live under hyperelasticity, which is legal with all solvers).
- HLL is untouched.

case_validator.py prohibits hypoelasticity with any solver but HLL, so these
blocks were unreachable; after PR #1572 deleted the hypoelastic energy blocks
they consumed tau_e_L/R that nothing in these files loads. Per the maintainer
ruling: prefer deleting code - lifting the restriction later means adding the
code back deliberately.

Unused locals removed from LF: tau_e_L/R and flux_tau_L/R (zero refs after
deletion) and xi_field_L/R (orphaned since the solver split, never referenced
in LF) - declarations and GPU private-list entries. HLLC declarations are
unchanged (tau_e/G remain live on the hyper/elasticity paths).

GPU census: LF -7 acc seq-loop directives (2 deleted seq loops x 3 sweep
copies + 1 cyl-only), HLLC -6 (1 seq loop x 3 copies x 2 blocks), OMP path
unchanged (seq loops emit nothing under OMP); all deltas trace to deleted
blocks.

Doc fixes: rescope the cray_inline guidance to cross-module helpers only,
describe the AMD case-opt guard as sitting on the helper's dummy declaration
with matching caller-local guards, and note in
s_compute_hypoelastic_interface_energy that the verysmall gate is the
deliberate ruling that replaced HLL's former G > 1000 stability floor and
retired its TODO.
---
 docs/documentation/gpuParallelization.md | 13 +++--
 src/simulation/m_riemann_solver_hllc.fpp | 22 ---------
 src/simulation/m_riemann_solver_lf.fpp   | 60 +++---------------------
 src/simulation/m_riemann_state.fpp       |  4 +-
 4 files changed, 18 insertions(+), 81 deletions(-)

diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md
index 4830b480f0..3023ac7880 100644
--- a/docs/documentation/gpuParallelization.md
+++ b/docs/documentation/gpuParallelization.md
@@ -635,9 +635,11 @@ end subroutine s_accumulate_mixture_properties
 
 **Key idioms.**
 
-- Always ``parallelism='[seq]'`` and `cray_inline=True` for these helpers. The Cray
-  compiler does not inline cross-file `routine` calls without `cray_inline`; without
-  it, performance collapses silently on that backend.
+- Always ``parallelism='[seq]'`` for these helpers. `cray_inline=True` is required
+  when the helper is called from other modules — the Cray compiler does not inline
+  cross-file `routine` calls without it, and performance collapses silently on that
+  backend. Helpers called only from their own module (e.g., the shear/bulk stress
+  tensor pair in `m_riemann_state.fpp`) do not need it.
 - `function_name=` is required when `cray_inline=True` (the Cray inline directive
   needs the explicit name).
 - The `GPU_ROUTINE` directive lives in the *definition*, not the call site. The
@@ -654,8 +656,9 @@ arrays that are sized by runtime parameters at compile time must be declared wit
 explicit constant bound. Use an explicit `n` argument (e.g., `integer, intent(in) ::
 nf`) and dimension helpers as `dimension(nf)` rather than `dimension(num_fluids)`.
 See `s_compute_interface_reynolds` in `src/simulation/m_riemann_state.fpp` for the
-`#:if not MFC_CASE_OPTIMIZATION and USING_AMD` guard pattern where the caller-side
-dimensioning differs.
+`#:if not MFC_CASE_OPTIMIZATION and USING_AMD` guard pattern: the guard sits on the
+dummy-argument declaration in the helper's definition, with matching guards on the
+callers' own local declarations so the actual and dummy bounds agree.
 
 **Declare scoping.** The `GPU_ROUTINE` directive must appear in the source file
 that defines the routine. Helpers added to `m_riemann_state.fpp` are automatically
diff --git a/src/simulation/m_riemann_solver_hllc.fpp b/src/simulation/m_riemann_solver_hllc.fpp
index 55dea991ea..da59cbf1a9 100644
--- a/src/simulation/m_riemann_solver_hllc.fpp
+++ b/src/simulation/m_riemann_solver_hllc.fpp
@@ -442,17 +442,6 @@ contains
 
                                 flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = vel_src_rsx_vf(${SF('')}$, dir_idx(1))
 
-                                ! HYPOELASTIC STRESS EVOLUTION FLUX.
-                                if (hypoelasticity) then
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
-                                        flux_rsx_vf(${SF('')}$, &
-                                                    & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) &
-                                                    & - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + xi_P*(s_S/(s_R - s_S)) &
-                                                    & *(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(dir_idx(1))*tau_e_R(i))
-                                    end do
-                                end if
-
                                 ! Hyperelastic reference map flux for material deformation tracking
                                 if (hyperelasticity) then
                                     $:GPU_LOOP(parallelism='[seq]')
@@ -1404,17 +1393,6 @@ contains
                                     flux_rsx_vf(${SF('')}$, eqn_idx%E) = flux_rsx_vf(${SF('')}$, eqn_idx%E) + flux_ene_e
                                 end if
 
-                                ! HYPOELASTIC STRESS EVOLUTION FLUX.
-                                if (hypoelasticity) then
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1
-                                        flux_rsx_vf(${SF('')}$, &
-                                                    & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) &
-                                                    & - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + xi_P*(s_S/(s_R - s_S)) &
-                                                    & *(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(dir_idx(1))*tau_e_R(i))
-                                    end do
-                                end if
-
                                 ! VOLUME FRACTION FLUX.
                                 $:GPU_LOOP(parallelism='[seq]')
                                 do i = eqn_idx%adv%beg, eqn_idx%adv%end
diff --git a/src/simulation/m_riemann_solver_lf.fpp b/src/simulation/m_riemann_solver_lf.fpp
index ce2d6cd237..b86367b9d3 100644
--- a/src/simulation/m_riemann_solver_lf.fpp
+++ b/src/simulation/m_riemann_solver_lf.fpp
@@ -35,7 +35,6 @@ contains
 
         ! Intercell fluxes
         type(scalar_field), dimension(sys_size), intent(inout) :: flux_vf, flux_src_vf, flux_gsrc_vf
-        real(wp)                                               :: flux_tau_L, flux_tau_R
         integer, intent(in)                                    :: norm_dir
         type(int_bounds_info), intent(in)                      :: ix, iy, iz
 
@@ -73,9 +72,7 @@ contains
         real(wp)                  :: pi_inf_L, pi_inf_R
         real(wp)                  :: qv_L, qv_R
         real(wp)                  :: c_L, c_R
-        real(wp), dimension(6)    :: tau_e_L, tau_e_R
         real(wp), dimension(2)    :: Re_L, Re_R
-        real(wp), dimension(3)    :: xi_field_L, xi_field_R
         real(wp)                  :: rho_avg
         real(wp)                  :: H_avg
         real(wp)                  :: gamma_avg
@@ -111,14 +108,13 @@ contains
             #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs)
             if (norm_dir == ${NORM_DIR}$) then
                 $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, &
-                                    & tau_e_L, tau_e_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, &
-                                    & xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, &
-                                    & h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_grad_L, &
-                                    & vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, &
-                                    & Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, rho_L, rho_R, &
-                                    & gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, H_R, ptilde_L, &
-                                    & ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, Cp_L, Cp_R, Cv_L, &
-                                    & Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]')
+                                    & Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, &
+                                    & Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, &
+                                    & B2, b4, cm, pcorr, zcoef, vel_grad_L, vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, &
+                                    & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, &
+                                    & c_avg, pres_L, pres_R, rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, &
+                                    & c_R, E_L, E_R, H_L, H_R, ptilde_L, ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, &
+                                    & eps, c_sum_Yi_Phi, Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]')
                 do l = ${Z_BND}$%beg, ${Z_BND}$%end
                     do k = ${Y_BND}$%beg, ${Y_BND}$%end
                         do j = ${X_BND}$%beg, ${X_BND}$%end
@@ -379,16 +375,6 @@ contains
                                                 & + s_M*s_P*(rho_L*vel_L(dir_idx(i)) - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) &
                                                 & + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i)))
                                 end do
-                            else if (hypoelasticity) then
-                                $:GPU_LOOP(parallelism='[seq]')
-                                do i = 1, num_vels
-                                    flux_rsx_vf(${SF('')}$, &
-                                                & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) &
-                                                & + dir_flg(dir_idx(i))*pres_R - tau_e_R(dir_idx_tau(i))) &
-                                                & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*pres_L &
-                                                & - tau_e_L(dir_idx_tau(i))) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) &
-                                                & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P)
-                                end do
                             else
                                 $:GPU_LOOP(parallelism='[seq]')
                                 do i = 1, num_vels
@@ -422,17 +408,6 @@ contains
                                             & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) - s_P*vel_L(dir_idx(1) &
                                             & )*(E_L + pres_L - ptilde_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) &
                                             & *pcorr*(vel_R_rms - vel_L_rms)/2._wp
-                            else if (hypoelasticity) then
-                                flux_tau_L = 0._wp; flux_tau_R = 0._wp
-                                $:GPU_LOOP(parallelism='[seq]')
-                                do i = 1, num_dims
-                                    flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i))
-                                    flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i))
-                                end do
-                                flux_rsx_vf(${SF('')}$, &
-                                            & eqn_idx%E) = (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) &
-                                            & - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) + s_M*s_P*(E_L - E_R))/(s_M &
-                                            & - s_P)
                             else
                                 flux_rsx_vf(${SF('')}$, &
                                             & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R) - s_P*vel_L(dir_idx(1))*(E_L &
@@ -440,16 +415,6 @@ contains
                                             & - vel_L_rms)/2._wp
                             end if
 
-                            ! Elastic Stresses
-                            if (hypoelasticity) then
-                                do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1  ! TODO: this indexing may be slow
-                                    flux_rsx_vf(${SF('')}$, &
-                                                & eqn_idx%stress%beg - 1 + i) = (s_M*(rho_R*vel_R(dir_idx(1))*tau_e_R(i)) &
-                                                & - s_P*(rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + s_M*s_P*(rho_L*tau_e_L(i) &
-                                                & - rho_R*tau_e_R(i)))/(s_M - s_P)
-                                end do
-                            end if
-
                             ! Advection flux and source: interface velocity for volume fraction transport
                             $:GPU_LOOP(parallelism='[seq]')
                             do i = eqn_idx%adv%beg, eqn_idx%adv%end
@@ -521,17 +486,6 @@ contains
                                         flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i)
                                     end do
                                 end if
-
-                                if (cyl_coord .and. hypoelasticity) then
-                                    ! += tau_sigmasigma using HLL
-                                    flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%cont%end + 2) = flux_gsrc_rsx_vf(${SF('')}$, &
-                                                     & eqn_idx%cont%end + 2) + (s_M*tau_e_R(4) - s_P*tau_e_L(4))/(s_M - s_P)
-
-                                    $:GPU_LOOP(parallelism='[seq]')
-                                    do i = eqn_idx%stress%beg, eqn_idx%stress%end
-                                        flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i)
-                                    end do
-                                end if
                             #:endif
                         end do
                     end do
diff --git a/src/simulation/m_riemann_state.fpp b/src/simulation/m_riemann_state.fpp
index 38e15ce995..9efd3972a2 100644
--- a/src/simulation/m_riemann_state.fpp
+++ b/src/simulation/m_riemann_state.fpp
@@ -1076,7 +1076,9 @@ contains
     !> Accumulate the hypoelastic stress contribution to the energies of the left and right Riemann states: mix the shear modulus
     !! over the fluids, scale it by the continuum damage state when damage is modeled, and add the elastic energy of each stress
     !! component (doubled for the shear components) when both mixture moduli are non-negligible. The elastic shear stresses are
-    !! loaded from the state buffers by the caller, which reuses them for the stress fluxes and elastic wave speeds.
+    !! loaded from the state buffers by the caller, which reuses them for the stress fluxes and elastic wave speeds. The G >
+    !! verysmall gate is a deliberate maintainer ruling that replaces HLL's former hard-coded G > 1000 stability floor, retiring its
+    !! "TODO take out if statement if stable without".
     subroutine s_compute_hypoelastic_interface_energy(nf, alpha_L, alpha_R, damage_L, damage_R, tau_e_L, tau_e_R, G_L, G_R, E_L, &
         & E_R)
 

From a6d9d9d1b42a541ac033026f48e828b6718d2a9f Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 16 Jun 2026 09:00:34 -0400
Subject: [PATCH 7/7] HLL: add s_M/s_P/xi_M/xi_P to private list (fix CCE-19
 defaultmap spill on Cray GPU-OMP)

---
 src/simulation/m_riemann_solver_hll.fpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/simulation/m_riemann_solver_hll.fpp b/src/simulation/m_riemann_solver_hll.fpp
index f46aa94e1c..8cc4ade407 100644
--- a/src/simulation/m_riemann_solver_hll.fpp
+++ b/src/simulation/m_riemann_solver_hll.fpp
@@ -115,15 +115,15 @@ contains
             #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs)
             if (norm_dir == ${NORM_DIR}$) then
                 $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, &
-                                    & tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, &
-                                    & Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, &
-                                    & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, rho_L, rho_R, &
-                                    & pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, &
-                                    & Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, &
-                                    & gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, damage_L, damage_R, &
-                                    & rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, &
-                                    & Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', &
-                                    & copyin='[norm_dir]', firstprivate='[Re_size_loc1, Re_size_loc2]')
+                                    & tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, Ys_L, Ys_R, xi_field_L, &
+                                    & xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, &
+                                    & h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, &
+                                    & rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, &
+                                    & T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, &
+                                    & gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, damage_L, &
+                                    & damage_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, &
+                                    & vel_avg_rms, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, &
+                                    & flux_tau_R]', copyin='[norm_dir]', firstprivate='[Re_size_loc1, Re_size_loc2]')
                 do l = ${Z_BND}$%beg, ${Z_BND}$%end
                     do k = ${Y_BND}$%beg, ${Y_BND}$%end
                         do j = ${X_BND}$%beg, ${X_BND}$%end