diff --git a/benchmarks/5eq_rk3_weno3_hll/case.py b/benchmarks/5eq_rk3_weno3_hll/case.py new file mode 100644 index 0000000000..1c7558edb7 --- /dev/null +++ b/benchmarks/5eq_rk3_weno3_hll/case.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_hll +# Additional Benchmarked Features +# - model_equations : 2 +# - time_stepper : 3 +# - weno_order : 3 +# - riemann_solver : hll + +import argparse +import json +import math + +parser = argparse.ArgumentParser(prog="Benchmarking Case 1", description="This MFC case was created for the purposes of benchmarking MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument("--mfc", type=json.loads, default="{}", metavar="DICT", help="MFC's toolchain's internal state.") +parser.add_argument("--gbpp", type=int, metavar="MEM", default=16, help="Adjusts the problem size per rank to fit into [MEM] GB of GPU memory per GPU.") +parser.add_argument("--steps", type=int, default=None, help="Override t_step_stop/t_step_save.") + +ARGS = vars(parser.parse_args()) +DICT = ARGS["mfc"] + +size = 1 if DICT["gpu"] else 0 + +ppg = 8000000 / 16.0 +procs = DICT["nodes"] * DICT["tasks_per_node"] +ncells = math.floor(ppg * procs * ARGS["gbpp"]) +s = math.floor((ncells / 2.0) ** (1 / 3)) +Nx, Ny, Nz = 2 * s, s, s + +# athmospheric pressure - Pa (used as reference value) +patm = 101325 + +# Initial Droplet Diameter / Reference length - m +D0 = 1.0e-3 + +# cavity to droplet ratio +CtD = 0.06 + +# cavity relative eccentricity (distance between radii) +ecc = 0.564 + +# initial shock distance from the y axis. Note that the droplet center is located at y = 0. Thus, the distance from the shock to +# the droplet is about D0/8 +ISD = 5.0 / 8 * D0 + +# pre-shock properties - AIR + +# pressure - Pa +p0a = patm + +# density - kg/m3 +rho0a = 1.204 + +# gamma +gama = 1.40 + +# pi infinity - Pa +pia = 0 + +# speed of sound - M/s +c_a = math.sqrt(gama * (p0a + pia) / rho0a) + +# Droplet - WATER + +# surface tension - N / m +st = 0.00e0 + +# Delta Pressure - Pa +DP = -st * 4 / D0 + +# initial pressure inside the droplet - Pa +p0w = p0a - DP + +# density - kg/m3 +rho0w = 1000 + +# gama +gamw = 6.12 + +# pi infty - Pa +piw = 3.43e08 + +# speed of sound - m/s +c_w = math.sqrt(gamw * (p0w + piw) / rho0w) + +# Shock Mach number of interest. Note that the post-shock properties can be defined in terms of either +# Min or psOp0a. Just comment/uncomment appropriately +Min = 2.4 + +# Pos to pre shock ratios - AIR + +# pressure +psOp0a = (Min**2 - 1) * 2 * gama / (gama + 1) + 1 +# psOp0a = 4.5 + +# density +rhosOrho0a = (1 + (gama + 1) / (gama - 1) * psOp0a) / ((gama + 1) / (gama - 1) + psOp0a) + +# Mach number of the shocked region - just a checker, as it must return "Min" +Ms = math.sqrt((gama + 1.0) / (2.0 * gama) * (psOp0a - 1.0) * (p0a / (p0a + pia)) + 1.0) + +# shock speed of sound - m/s +ss = Ms * c_a + +# post-shock - AIR + +# pressure - Pa +ps = psOp0a * p0a + +# density - kg / m3 +rhos = rhosOrho0a * rho0a + +# post shock speed of sound - m/s +c_s = math.sqrt(gama * (ps + pia) / rhos) + +# velocity at the post shock - m/s +vel = c_a / gama * (psOp0a - 1.0) * p0a / (p0a + pia) / Ms + +# Domain boundaries - m + +# x direction +xb = -8.4707 * D0 +xe = 9.6226 * D0 + +# xb = -10 * D0 +# xe = 10 * D0 + +# y direction +yb = 0 * D0 +ye = 10 * D0 + +# y direction +zb = 0 * D0 +ze = 10 * D0 + +# Stretching factor, to make sure the domaing is sufficiently large after the mesh stretch +StF = 4.0 + +# grid delta x if mesh were uniform in x direction - m. Note that I do not need a measure for dy +dx = (xe - xb) / Nx + +# I calculate tend twice; first is an estimate, second is +# the actual value used. This is because I am getting errors in the +# post process part every time I approximate the actual Nt by an integer +# number (think of a smarter way). + +# dimensionless time +ttilde = 1.92 + +# auxiliary simulation physical time - s. This is not YET the total simulation time, as it will be corrected so as to avoid +# mismatches in simulation and post_process parts. Note that I wrote it this way so I have better control over the # of autosaves +tendA = ttilde * D0 / vel + +cfl = 0.1 + +# time-step - s +dt = dx * cfl / ss + +# Save Frequency. Note that the number of autosaves will be SF + 1, as th IC (0.dat) is also saved +SF = 400 + +# making Nt divisible by SF +# 1 - ensure NtA goes slightly beyond tendA +NtA = int(tendA // dt + 1) + +# Array of saves. It is the same as Nt/Sf = t_step_save +AS = int(NtA // SF + 1) + +# Nt = total number of steps. Note that Nt >= NtA (so at least tendA is completely simulated) +Nt = AS * SF + +# total simulation time - s. Note that tend >= tendA +tend = Nt * dt + +# Configuring case dictionary +print( + json.dumps( + { + # Logistics + "run_time_info": "F", + # Computational Domain Parameters + "x_domain%beg": xb, + "x_domain%end": xe, + "y_domain%beg": yb, + "y_domain%end": ye, + "z_domain%beg": zb, + "z_domain%end": ze, + "m": Nx, + "n": Ny, + "p": Nz, + "cyl_coord": "F", + "dt": dt, + "t_step_start": 0, + "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)), + "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)), + # Simulation Algorithm Parameters + "num_patches": 3, + "model_eqns": 2, + "alt_soundspeed": "F", + "num_fluids": 2, + "mpp_lim": "T", + "mixture_err": "T", + "time_stepper": 3, + "weno_order": 3, + "weno_eps": 1.0e-16, + "weno_Re_flux": "F", + "weno_avg": "F", + "mapped_weno": "T", + "riemann_solver": "hll", + "wave_speeds": 1, + "avg_state": 2, + "bc_x%beg": -6, + "bc_x%end": -6, + "bc_y%beg": -2, + "bc_y%end": -3, + "bc_z%beg": -2, + "bc_z%end": -3, + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "parallel_io": "F", + # I will use 1 for WATER properties, and 2 for AIR properties + # Patch 1: Background (AIR - 2) + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": (xb + xe) / 2 * StF, + "patch_icpp(1)%y_centroid": (yb + ye) / 2 * StF, + "patch_icpp(1)%z_centroid": (yb + ye) / 2 * StF, + "patch_icpp(1)%length_x": (xe - xb) * StF, + "patch_icpp(1)%length_y": (ye - yb) * StF, + "patch_icpp(1)%length_z": (ze - zb) * StF, + "patch_icpp(1)%vel(1)": 0.0e00, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": p0a, + "patch_icpp(1)%alpha_rho(1)": 0.0e00, + "patch_icpp(1)%alpha_rho(2)": rho0a, + "patch_icpp(1)%alpha(1)": 0.0e00, + "patch_icpp(1)%alpha(2)": 1.0e00, + # Patch 2: Shocked state (AIR - 2) + "patch_icpp(2)%geometry": 9, + "patch_icpp(2)%alter_patch(1)": "T", + "patch_icpp(2)%x_centroid": -ISD - (xe - xb) / 2 * StF, + "patch_icpp(2)%y_centroid": (yb + ye) / 2 * StF, + "patch_icpp(2)%z_centroid": (zb + ze) / 2 * StF, + "patch_icpp(2)%length_x": (xe - xb) * StF, + "patch_icpp(2)%length_y": (ye - yb) * StF, + "patch_icpp(2)%length_z": (ze - zb) * StF, + "patch_icpp(2)%vel(1)": vel, + "patch_icpp(2)%vel(2)": 0.0e00, + "patch_icpp(2)%vel(3)": 0.0e00, + "patch_icpp(2)%pres": ps, + "patch_icpp(2)%alpha_rho(1)": 0.0e00, + "patch_icpp(2)%alpha_rho(2)": rhos, + "patch_icpp(2)%alpha(1)": 0.0e00, + "patch_icpp(2)%alpha(2)": 1.0e00, + # Patch 3: Droplet (WATER - 1) + "patch_icpp(3)%geometry": 8, + "patch_icpp(3)%x_centroid": 0.0e00, + "patch_icpp(3)%y_centroid": 0.0e00, + "patch_icpp(3)%z_centroid": 0.0e00, + "patch_icpp(3)%radius": D0 / 2, + "patch_icpp(3)%alter_patch(1)": "T", + "patch_icpp(3)%vel(1)": 0.0e00, + "patch_icpp(3)%vel(2)": 0.0e00, + "patch_icpp(3)%vel(3)": 0.0e00, + "patch_icpp(3)%pres": p0w, + "patch_icpp(3)%alpha_rho(1)": rho0w, + "patch_icpp(3)%alpha_rho(2)": 0.0e00, + "patch_icpp(3)%alpha(1)": 1.0e00, + "patch_icpp(3)%alpha(2)": 0.0e00, + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gamw - 1), + "fluid_pp(1)%pi_inf": gamw * piw / (gamw - 1), + "fluid_pp(2)%gamma": 1.0e00 / (gama - 1), + "fluid_pp(2)%pi_inf": gama * pia / (gama - 1), + } + ) +) diff --git a/benchmarks/5eq_rk3_weno3_lf/case.py b/benchmarks/5eq_rk3_weno3_lf/case.py new file mode 100644 index 0000000000..55662ea40d --- /dev/null +++ b/benchmarks/5eq_rk3_weno3_lf/case.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_lax_friedrichs +# Additional Benchmarked Features +# - model_equations : 2 +# - time_stepper : 3 +# - weno_order : 3 +# - riemann_solver : lax_friedrichs + +import argparse +import json +import math + +parser = argparse.ArgumentParser(prog="Benchmarking Case 1", description="This MFC case was created for the purposes of benchmarking MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument("--mfc", type=json.loads, default="{}", metavar="DICT", help="MFC's toolchain's internal state.") +parser.add_argument("--gbpp", type=int, metavar="MEM", default=16, help="Adjusts the problem size per rank to fit into [MEM] GB of GPU memory per GPU.") +parser.add_argument("--steps", type=int, default=None, help="Override t_step_stop/t_step_save.") + +ARGS = vars(parser.parse_args()) +DICT = ARGS["mfc"] + +size = 1 if DICT["gpu"] else 0 + +ppg = 8000000 / 16.0 +procs = DICT["nodes"] * DICT["tasks_per_node"] +ncells = math.floor(ppg * procs * ARGS["gbpp"]) +s = math.floor((ncells / 2.0) ** (1 / 3)) +Nx, Ny, Nz = 2 * s, s, s + +# athmospheric pressure - Pa (used as reference value) +patm = 101325 + +# Initial Droplet Diameter / Reference length - m +D0 = 1.0e-3 + +# cavity to droplet ratio +CtD = 0.06 + +# cavity relative eccentricity (distance between radii) +ecc = 0.564 + +# initial shock distance from the y axis. Note that the droplet center is located at y = 0. Thus, the distance from the shock to +# the droplet is about D0/8 +ISD = 5.0 / 8 * D0 + +# pre-shock properties - AIR + +# pressure - Pa +p0a = patm + +# density - kg/m3 +rho0a = 1.204 + +# gamma +gama = 1.40 + +# pi infinity - Pa +pia = 0 + +# speed of sound - M/s +c_a = math.sqrt(gama * (p0a + pia) / rho0a) + +# Droplet - WATER + +# surface tension - N / m +st = 0.00e0 + +# Delta Pressure - Pa +DP = -st * 4 / D0 + +# initial pressure inside the droplet - Pa +p0w = p0a - DP + +# density - kg/m3 +rho0w = 1000 + +# gama +gamw = 6.12 + +# pi infty - Pa +piw = 3.43e08 + +# speed of sound - m/s +c_w = math.sqrt(gamw * (p0w + piw) / rho0w) + +# Shock Mach number of interest. Note that the post-shock properties can be defined in terms of either +# Min or psOp0a. Just comment/uncomment appropriately +Min = 2.4 + +# Pos to pre shock ratios - AIR + +# pressure +psOp0a = (Min**2 - 1) * 2 * gama / (gama + 1) + 1 +# psOp0a = 4.5 + +# density +rhosOrho0a = (1 + (gama + 1) / (gama - 1) * psOp0a) / ((gama + 1) / (gama - 1) + psOp0a) + +# Mach number of the shocked region - just a checker, as it must return "Min" +Ms = math.sqrt((gama + 1.0) / (2.0 * gama) * (psOp0a - 1.0) * (p0a / (p0a + pia)) + 1.0) + +# shock speed of sound - m/s +ss = Ms * c_a + +# post-shock - AIR + +# pressure - Pa +ps = psOp0a * p0a + +# density - kg / m3 +rhos = rhosOrho0a * rho0a + +# post shock speed of sound - m/s +c_s = math.sqrt(gama * (ps + pia) / rhos) + +# velocity at the post shock - m/s +vel = c_a / gama * (psOp0a - 1.0) * p0a / (p0a + pia) / Ms + +# Domain boundaries - m + +# x direction +xb = -8.4707 * D0 +xe = 9.6226 * D0 + +# xb = -10 * D0 +# xe = 10 * D0 + +# y direction +yb = 0 * D0 +ye = 10 * D0 + +# y direction +zb = 0 * D0 +ze = 10 * D0 + +# Stretching factor, to make sure the domaing is sufficiently large after the mesh stretch +StF = 4.0 + +# grid delta x if mesh were uniform in x direction - m. Note that I do not need a measure for dy +dx = (xe - xb) / Nx + +# I calculate tend twice; first is an estimate, second is +# the actual value used. This is because I am getting errors in the +# post process part every time I approximate the actual Nt by an integer +# number (think of a smarter way). + +# dimensionless time +ttilde = 1.92 + +# auxiliary simulation physical time - s. This is not YET the total simulation time, as it will be corrected so as to avoid +# mismatches in simulation and post_process parts. Note that I wrote it this way so I have better control over the # of autosaves +tendA = ttilde * D0 / vel + +cfl = 0.1 + +# time-step - s +dt = dx * cfl / ss + +# Save Frequency. Note that the number of autosaves will be SF + 1, as th IC (0.dat) is also saved +SF = 400 + +# making Nt divisible by SF +# 1 - ensure NtA goes slightly beyond tendA +NtA = int(tendA // dt + 1) + +# Array of saves. It is the same as Nt/Sf = t_step_save +AS = int(NtA // SF + 1) + +# Nt = total number of steps. Note that Nt >= NtA (so at least tendA is completely simulated) +Nt = AS * SF + +# total simulation time - s. Note that tend >= tendA +tend = Nt * dt + +# Configuring case dictionary +print( + json.dumps( + { + # Logistics + "run_time_info": "F", + # Computational Domain Parameters + "x_domain%beg": xb, + "x_domain%end": xe, + "y_domain%beg": yb, + "y_domain%end": ye, + "z_domain%beg": zb, + "z_domain%end": ze, + "m": Nx, + "n": Ny, + "p": Nz, + "cyl_coord": "F", + "dt": dt, + "t_step_start": 0, + "t_step_stop": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)), + "t_step_save": ARGS["steps"] if ARGS["steps"] is not None else int(2 * (5 * size + 5)), + # Simulation Algorithm Parameters + "num_patches": 3, + "model_eqns": 2, + "alt_soundspeed": "F", + "num_fluids": 2, + "mpp_lim": "T", + "mixture_err": "T", + "time_stepper": 3, + "weno_order": 3, + "weno_eps": 1.0e-16, + "weno_Re_flux": "F", + "weno_avg": "F", + "mapped_weno": "T", + "riemann_solver": "lax_friedrichs", + "wave_speeds": 1, + "avg_state": 2, + "bc_x%beg": -6, + "bc_x%end": -6, + "bc_y%beg": -2, + "bc_y%end": -3, + "bc_z%beg": -2, + "bc_z%end": -3, + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "parallel_io": "F", + # I will use 1 for WATER properties, and 2 for AIR properties + # Patch 1: Background (AIR - 2) + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": (xb + xe) / 2 * StF, + "patch_icpp(1)%y_centroid": (yb + ye) / 2 * StF, + "patch_icpp(1)%z_centroid": (yb + ye) / 2 * StF, + "patch_icpp(1)%length_x": (xe - xb) * StF, + "patch_icpp(1)%length_y": (ye - yb) * StF, + "patch_icpp(1)%length_z": (ze - zb) * StF, + "patch_icpp(1)%vel(1)": 0.0e00, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": p0a, + "patch_icpp(1)%alpha_rho(1)": 0.0e00, + "patch_icpp(1)%alpha_rho(2)": rho0a, + "patch_icpp(1)%alpha(1)": 0.0e00, + "patch_icpp(1)%alpha(2)": 1.0e00, + # Patch 2: Shocked state (AIR - 2) + "patch_icpp(2)%geometry": 9, + "patch_icpp(2)%alter_patch(1)": "T", + "patch_icpp(2)%x_centroid": -ISD - (xe - xb) / 2 * StF, + "patch_icpp(2)%y_centroid": (yb + ye) / 2 * StF, + "patch_icpp(2)%z_centroid": (zb + ze) / 2 * StF, + "patch_icpp(2)%length_x": (xe - xb) * StF, + "patch_icpp(2)%length_y": (ye - yb) * StF, + "patch_icpp(2)%length_z": (ze - zb) * StF, + "patch_icpp(2)%vel(1)": vel, + "patch_icpp(2)%vel(2)": 0.0e00, + "patch_icpp(2)%vel(3)": 0.0e00, + "patch_icpp(2)%pres": ps, + "patch_icpp(2)%alpha_rho(1)": 0.0e00, + "patch_icpp(2)%alpha_rho(2)": rhos, + "patch_icpp(2)%alpha(1)": 0.0e00, + "patch_icpp(2)%alpha(2)": 1.0e00, + # Patch 3: Droplet (WATER - 1) + "patch_icpp(3)%geometry": 8, + "patch_icpp(3)%x_centroid": 0.0e00, + "patch_icpp(3)%y_centroid": 0.0e00, + "patch_icpp(3)%z_centroid": 0.0e00, + "patch_icpp(3)%radius": D0 / 2, + "patch_icpp(3)%alter_patch(1)": "T", + "patch_icpp(3)%vel(1)": 0.0e00, + "patch_icpp(3)%vel(2)": 0.0e00, + "patch_icpp(3)%vel(3)": 0.0e00, + "patch_icpp(3)%pres": p0w, + "patch_icpp(3)%alpha_rho(1)": rho0w, + "patch_icpp(3)%alpha_rho(2)": 0.0e00, + "patch_icpp(3)%alpha(1)": 1.0e00, + "patch_icpp(3)%alpha(2)": 0.0e00, + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gamw - 1), + "fluid_pp(1)%pi_inf": gamw * piw / (gamw - 1), + "fluid_pp(2)%gamma": 1.0e00 / (gama - 1), + "fluid_pp(2)%pi_inf": gama * pia / (gama - 1), + } + ) +) diff --git a/cmake/MFCTargets.cmake b/cmake/MFCTargets.cmake index 5242fe9fd8..3e848aca4f 100644 --- a/cmake/MFCTargets.cmake +++ b/cmake/MFCTargets.cmake @@ -74,7 +74,6 @@ exit 0 foreach (a_target ${IPO_TARGETS}) set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON) - message(STATUS ${CMAKE_Fortran_COMPILER_ID}) target_include_directories(${a_target} PRIVATE "${CMAKE_SOURCE_DIR}/src/common" @@ -161,7 +160,6 @@ exit 0 if (NOT TARGET OpenMP::OpenMP_Fortran) message(FATAL_ERROR "OpenMP + Fortran is unsupported.") endif() - set(ENV{OMP_TARGET_OFFLOAD} [MANDATORY]) # target_link_libraries(${a_target} PRIVATE OpenMP::OpenMP_Fortran) target_compile_definitions(${a_target} PRIVATE MFC_OpenMP MFC_GPU) @@ -209,11 +207,11 @@ exit 0 # GH-200 Unified Memory Support if (MFC_Unified) - target_compile_options(${ARGS_TARGET} + target_compile_options(${a_target} PRIVATE -gpu=mem:unified:managedalloc -cuda ) # "This option must appear in both the compile and link lines" -- NVHPC Docs - target_link_options(${ARGS_TARGET} + target_link_options(${a_target} PRIVATE -gpu=mem:unified:managedalloc -cuda ) endif() @@ -226,7 +224,7 @@ exit 0 elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") # Frontier Unified Memory Support if (MFC_Unified) - target_compile_options(${ARGS_TARGET} + target_compile_options(${a_target} PRIVATE -DFRONTIER_UNIFIED) endif() @@ -239,7 +237,7 @@ exit 0 elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") if (MFC_Unified) - target_compile_options(${ARGS_TARGET} + target_compile_options(${a_target} PRIVATE -DFRONTIER_UNIFIED) endif() diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 489ad42b12..3023ac7880 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -607,6 +607,66 @@ Does not do anything for OpenMP currently ------------------------------------------------------------------------------------------ +## Writing GPU_ROUTINE Device Helpers + +`GPU_ROUTINE` marks a subroutine or function as callable from within a GPU kernel +(an OpenACC `routine` or OpenMP `declare target` directive). The standard idiom for +pure, sequential per-thread helpers is: + +```fortran +subroutine s_accumulate_mixture_properties(nf, alpha_rho_K, alpha_K, rho_K, gamma_K, pi_inf_K, qv_K) + + $:GPU_ROUTINE(function_name='s_accumulate_mixture_properties', parallelism='[seq]', cray_inline=True) + + integer, intent(in) :: nf + real(wp), dimension(nf), intent(in) :: alpha_rho_K, alpha_K + real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K + ... +end subroutine s_accumulate_mixture_properties +``` + +**When to use it.** Extract a block into a `GPU_ROUTINE` helper when: + +- It does roughly 50–100 or more FLOPs of work per call, enough to amortize the + call overhead on device. Below that threshold, the helper is justified only when + the same block appears verbatim in three or more callers (duplication removal). +- It is called from inside a `GPU_PARALLEL_LOOP` region — ``parallelism='[seq]'`` is + correct for sequential per-thread work; the surrounding loop keeps the parallelism. + +**Key idioms.** + +- Always ``parallelism='[seq]'`` for these helpers. `cray_inline=True` is required + when the helper is called from other modules — the Cray compiler does not inline + cross-file `routine` calls without it, and performance collapses silently on that + backend. Helpers called only from their own module (e.g., the shear/bulk stress + tensor pair in `m_riemann_state.fpp`) do not need it. +- `function_name=` is required when `cray_inline=True` (the Cray inline directive + needs the explicit name). +- The `GPU_ROUTINE` directive lives in the *definition*, not the call site. The + caller needs no annotation beyond being inside a `GPU_PARALLEL_LOOP`. + +**Caller-loads, helper-computes.** Callers do all coordinate-indexed array loads +from the global state arrays (`qL_rs_vf`, `qR_rs_vf`, etc.) before the call; the +helper receives only scalars or small arrays with explicit-shape dimensioning. This +is required because the `SF` indexing lambda used in solver loops is defined locally +inside each solver's `#:for NORM_DIR` block and cannot be referenced from a helper. + +**AMD case-opt compatibility.** Under `--case-optimization` with the AMD backend, +arrays that are sized by runtime parameters at compile time must be declared with an +explicit constant bound. Use an explicit `n` argument (e.g., `integer, intent(in) :: +nf`) and dimension helpers as `dimension(nf)` rather than `dimension(num_fluids)`. +See `s_compute_interface_reynolds` in `src/simulation/m_riemann_state.fpp` for the +`#:if not MFC_CASE_OPTIMIZATION and USING_AMD` guard pattern: the guard sits on the +dummy-argument declaration in the helper's definition, with matching guards on the +callers' own local declarations so the actual and dummy bounds agree. + +**Declare scoping.** The `GPU_ROUTINE` directive must appear in the source file +that defines the routine. Helpers added to `m_riemann_state.fpp` are automatically +in scope for every solver module that `use`s it — no additional declare-target +annotations are needed at call sites. + +------------------------------------------------------------------------------------------ + # Debugging Tools and Tips for GPUs ## Compiler agnostic tools diff --git a/src/simulation/m_riemann_solver_hll.fpp b/src/simulation/m_riemann_solver_hll.fpp index 72094aec7a..8cc4ade407 100644 --- a/src/simulation/m_riemann_solver_hll.fpp +++ b/src/simulation/m_riemann_solver_hll.fpp @@ -75,6 +75,7 @@ contains real(wp) :: c_L, c_R real(wp), dimension(6) :: tau_e_L, tau_e_R real(wp) :: G_L, G_R + real(wp) :: damage_L, damage_R real(wp), dimension(2) :: Re_L, Re_R real(wp), dimension(3) :: xi_field_L, xi_field_R real(wp) :: rho_avg @@ -96,8 +97,8 @@ contains type(riemann_states) :: vdotB, B2 type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z) type(riemann_states_vec3) :: cm !< Conservative momentum variables - integer :: i, j, k, l, q !< Generic loop iterators - integer :: Re_size_loc1, Re_size_loc2 !< host copy of Re_size; amdflang reads the declare-target original stale cross-TU + integer :: i, j, k, l !< Generic loop iterators + integer :: Re_size_loc1, Re_size_loc2 !< host copies of Re_size; amdflang reads the declare-target original stale cross-TU ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & @@ -113,16 +114,16 @@ contains #:set SV = STENCIL_VAR #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, & - & alpha_R, tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, & - & Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, & - & pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, rho_L, rho_R, & - & pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, & - & Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, & - & gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, & - & gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, Ms_L, Ms_R, pres_SL, & - & pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R, s_M, s_P, xi_M, xi_P]', & - & copyin='[norm_dir]', firstprivate='[Re_size_loc1, Re_size_loc2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & + & tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, Ys_L, Ys_R, xi_field_L, & + & xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, & + & h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, & + & rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, & + & T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, & + & gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, damage_L, & + & damage_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, & + & vel_avg_rms, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, & + & flux_tau_R]', copyin='[norm_dir]', firstprivate='[Re_size_loc1, Re_size_loc2]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -200,37 +201,12 @@ contains alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, qv_R) if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_L(i) = 0._wp - if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L, Re_size_loc1, Re_size_loc2) + call s_compute_interface_reynolds(alpha_R, Re_R, Re_size_loc1, Re_size_loc2) end if if (chemistry) then @@ -326,34 +302,20 @@ contains ! elastic energy update if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) + do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 + tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) + tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) end do + damage_L = 0._wp; damage_R = 0._wp if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) + damage_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage) + damage_R = qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) - ! Elastic contribution to energy if G large enough TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do + call s_compute_hypoelastic_interface_energy(num_fluids, alpha_L, alpha_R, damage_L, damage_R, & + & tau_e_L, tau_e_R, G_L, G_R, E_L, E_R) end if @:compute_average_state() diff --git a/src/simulation/m_riemann_solver_hllc.fpp b/src/simulation/m_riemann_solver_hllc.fpp index 3fb89169ea..da14be138a 100644 --- a/src/simulation/m_riemann_solver_hllc.fpp +++ b/src/simulation/m_riemann_solver_hllc.fpp @@ -47,10 +47,13 @@ contains #:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R real(wp), dimension(3) :: alpha_L, alpha_R + real(wp), dimension(3) :: alpha_lim_L, alpha_lim_R real(wp), dimension(3) :: vel_L, vel_R #:else real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R real(wp), dimension(num_fluids) :: alpha_L, alpha_R + !> Post-limiter volume fractions (alpha_L/R retain the pre-limiter loads used downstream) + real(wp), dimension(num_fluids) :: alpha_lim_L, alpha_lim_R real(wp), dimension(num_dims) :: vel_L, vel_R #:endif @@ -118,8 +121,8 @@ contains real(wp) :: pres_SL, pres_SR, Ms_L, Ms_R real(wp) :: flux_ene_e real(wp) :: zcoef, pcorr !< low Mach number correction - integer :: Re_max, i, j, k, l, q !< Generic loop iterators - integer :: Re_size_loc1, Re_size_loc2 !< host copy of Re_size; amdflang reads the declare-target original stale cross-TU + integer :: i, j, k, l, q !< Generic loop iterators + integer :: Re_size_loc1, Re_size_loc2 !< host copies of Re_size; amdflang reads the declare-target original stale cross-TU ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & @@ -128,6 +131,7 @@ contains ! Reshaping inputted data based on dimensional splitting direction call s_initialize_riemann_solver(flux_src_vf, norm_dir) + Re_size_loc1 = Re_size(1); Re_size_loc2 = Re_size(2) #:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & @@ -140,16 +144,17 @@ contains ! 6-EQUATION MODEL WITH HLLC HLLC star-state flux with contact wave speed s_S if (model_eqns == model_eqns_6eq) then ! 6-equation model (model_eqns=3): separate phasic internal energies - $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, vel_L, vel_R, Re_L, Re_R, alpha_L, alpha_R, Ys_L, & - & Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, & - & h_avg_2, tau_e_L, tau_e_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, rho_L, & - & rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, & - & T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, & - & Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, & - & rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, & - & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, & - & alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, & - & xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]', firstprivate='[Re_size_loc1, Re_size_loc2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, vel_L, vel_R, Re_L, Re_R, alpha_L, alpha_R, & + & alpha_rho_L, alpha_rho_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, & + & Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, flux_ene_e, xi_field_L, & + & xi_field_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, & + & Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, & + & Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, & + & qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, & + & vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, & + & alpha_L_sum, alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, & + & s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]', & + & firstprivate='[Re_size_loc1, Re_size_loc2]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -212,69 +217,25 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) - gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) - - rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) - gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) - + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%adv%beg + i - 1) alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%adv%beg + i - 1) end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, & + & qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, & + & qv_R) + if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_L(i) = 0._wp - if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_R(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1) - Re_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, & - & q) + Re_R(i) - end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L, Re_size_loc1, Re_size_loc2) + call s_compute_interface_reynolds(alpha_R, Re_R, Re_size_loc1, Re_size_loc2) end if E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do - end if - ! Hyperelastic stress contribution: strain energy added to total energy if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') @@ -485,17 +446,6 @@ contains flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = vel_src_rsx_vf(${SF('')}$, dir_idx(1)) - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - flux_rsx_vf(${SF('')}$, & - & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) & - & - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + xi_P*(s_S/(s_R - s_S)) & - & *(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(dir_idx(1))*tau_e_R(i)) - end do - end if - ! Hyperelastic reference map flux for material deformation tracking if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') @@ -598,18 +548,10 @@ contains alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, & + & qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, & + & qv_R) pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) @@ -742,12 +684,10 @@ contains end do ! Recalculating the radial momentum geometric source flux flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%cont%end + dir_idx(1)) & + & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), & + & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & + & dir_flg(dir_idx(1))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -762,12 +702,9 @@ contains flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & - & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, & + & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, & + & xi_R, xi_M, xi_P, dir_flg(dir_idx(1))) flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif @@ -778,13 +715,14 @@ contains else if (model_eqns == model_eqns_5eq .and. bubbles_euler) then ! 5-equation model with Euler-Euler bubble dynamics $:GPU_PARALLEL_LOOP(collapse=3, private='[i, q, R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, & - & vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, Re_L, Re_R, pcorr, zcoef, rho_L, & - & rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, & - & qv_R, qv_avg, c_L, c_R, c_avg, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, & - & Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, & - & xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, & - & R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, & - & Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]', firstprivate='[Re_size_loc1, Re_size_loc2]') + & vel_R, rho_avg, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, h_avg, gamma_avg, Re_L, & + & Re_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, gamma_L, gamma_R, & + & pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, c_avg, vel_L_rms, vel_R_rms, & + & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, & + & alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, & + & xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, & + & Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, & + & h_avg_2]', firstprivate='[Re_size_loc1, Re_size_loc2]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -796,6 +734,8 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do @@ -812,29 +752,15 @@ contains ! Retain this in the refactor if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) - gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) - rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) - gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, & + & pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, & + & pi_inf_R, qv_R) else if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) - gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) - rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) - gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids - 1, alpha_rho_L, alpha_L, rho_L, gamma_L, & + & pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids - 1, alpha_rho_R, alpha_R, rho_R, gamma_R, & + & pi_inf_R, qv_R) else rho_L = qL_prim_rsx_vf(${SF('')}$, 1) gamma_L = gammas(1) @@ -1137,12 +1063,10 @@ contains end do ! Recalculating the radial momentum geometric source flux flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%cont%end + dir_idx(1)) & + & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), & + & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & + & dir_flg(dir_idx(1))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -1158,12 +1082,9 @@ contains end do flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & - & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, & + & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, & + & xi_R, xi_M, xi_P, dir_flg(dir_idx(1))) flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif @@ -1173,15 +1094,15 @@ contains $:END_GPU_PARALLEL_LOOP() else ! 5-equation model (model_eqns=2): mixture total energy, volume fraction advection - $:GPU_PARALLEL_LOOP(collapse=3, private='[Re_max, i, q, T_L, T_R, vel_L_rms, vel_R_rms, pres_L, pres_R, & - & rho_L, gamma_L, pi_inf_L, qv_L, rho_R, gamma_R, pi_inf_R, qv_R, alpha_L_sum, & - & alpha_R_sum, E_L, E_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, & - & Gamm_R, Y_L, Y_R, H_L, H_R, qv_avg, rho_avg, gamma_avg, H_avg, c_L, c_R, c_avg, s_P, & - & s_M, xi_P, xi_M, xi_L, xi_R, xi_L_m1, xi_R_m1, Ms_L, Ms_R, pres_SL, pres_SR, vel_L, & - & vel_R, Re_L, Re_R, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, & - & vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, & - & tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, G_L, & - & G_R, c_sum_Yi_Phi, flux_ene_e]', copyin='[is1, is2, is3]', firstprivate='[Re_size_loc1, Re_size_loc2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, T_L, T_R, vel_L_rms, vel_R_rms, pres_L, pres_R, rho_L, gamma_L, & + & pi_inf_L, qv_L, rho_R, gamma_R, pi_inf_R, qv_R, alpha_L_sum, alpha_R_sum, E_L, E_R, & + & MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, Y_L, Y_R, H_L, & + & H_R, qv_avg, rho_avg, gamma_avg, H_avg, c_L, c_R, c_avg, s_P, s_M, xi_P, xi_M, xi_L, & + & xi_R, xi_L_m1, xi_R_m1, Ms_L, Ms_R, pres_SL, pres_SR, vel_L, vel_R, Re_L, Re_R, & + & alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, alpha_lim_L, alpha_lim_R, s_L, s_R, s_S, & + & vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, & + & Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, & + & h_iL, h_iR, h_avg_2, G_L, G_R]', copyin='[is1, is2, is3]', firstprivate='[Re_size_loc1, Re_size_loc2]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -1232,38 +1153,24 @@ contains end do end if + ! Post-limiter loads for the mixture properties; alpha_L/R keep the pre-limiter loads used + ! downstream $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) - gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) - - rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) - gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) + alpha_lim_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_lim_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do - Re_max = 0 - if (Re_size_loc1 > 0) Re_max = 1 - if (Re_size_loc2 > 0) Re_max = 2 + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_lim_L, rho_L, gamma_L, & + & pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_lim_R, rho_R, gamma_R, & + & pi_inf_R, qv_R) if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, Re_max - Re_L(i) = 0._wp - Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L, Re_size_loc1, Re_size_loc2) + call s_compute_interface_reynolds(alpha_R, Re_R, Re_size_loc1, Re_size_loc2) end if if (chemistry) then @@ -1322,35 +1229,6 @@ contains H_R = (E_R + pres_R)/rho_R end if - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do - end if - ! Hyperelastic stress contribution: strain energy added to total energy if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') @@ -1520,17 +1398,6 @@ contains flux_rsx_vf(${SF('')}$, eqn_idx%E) = flux_rsx_vf(${SF('')}$, eqn_idx%E) + flux_ene_e end if - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - flux_rsx_vf(${SF('')}$, & - & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) & - & - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + xi_P*(s_S/(s_R - s_S)) & - & *(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(dir_idx(1))*tau_e_R(i)) - end do - end if - ! VOLUME FRACTION FLUX. $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -1590,12 +1457,10 @@ contains end do ! Recalculating the radial momentum geometric source flux flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%cont%end + dir_idx(1)) & + & = f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L(dir_idx(1)), & + & vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & + & dir_flg(dir_idx(1))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -1611,12 +1476,9 @@ contains end do flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & - & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & - & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + & eqn_idx%mom%beg + 1) = -f_compute_hllc_star_momentum_flux(rho_L, & + & rho_R, vel_L(dir_idx(1)), vel_R(dir_idx(1)), s_M, s_P, s_S, xi_L, & + & xi_R, xi_M, xi_P, dir_flg(dir_idx(1))) flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif diff --git a/src/simulation/m_riemann_solver_lf.fpp b/src/simulation/m_riemann_solver_lf.fpp index f0df301a25..8e31937199 100644 --- a/src/simulation/m_riemann_solver_lf.fpp +++ b/src/simulation/m_riemann_solver_lf.fpp @@ -35,7 +35,6 @@ contains ! Intercell fluxes type(scalar_field), dimension(sys_size), intent(inout) :: flux_vf, flux_src_vf, flux_gsrc_vf - real(wp) :: flux_tau_L, flux_tau_R integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz @@ -73,10 +72,7 @@ contains real(wp) :: pi_inf_L, pi_inf_R real(wp) :: qv_L, qv_R real(wp) :: c_L, c_R - real(wp), dimension(6) :: tau_e_L, tau_e_R - real(wp) :: G_L, G_R real(wp), dimension(2) :: Re_L, Re_R - real(wp), dimension(3) :: xi_field_L, xi_field_R real(wp) :: rho_avg real(wp) :: H_avg real(wp) :: gamma_avg @@ -88,16 +84,16 @@ contains real(wp) :: vel_L_tmp, vel_R_tmp real(wp) :: Ms_L, Ms_R, pres_SL, pres_SR real(wp) :: alpha_L_sum, alpha_R_sum - real(wp) :: zcoef, pcorr !< low Mach number correction + real(wp) :: zcoef, pcorr !< low Mach number correction type(riemann_states) :: c_fast, pres_mag type(riemann_states_vec3) :: B - type(riemann_states) :: Ga !< Gamma (Lorentz factor) + type(riemann_states) :: Ga !< Gamma (Lorentz factor) type(riemann_states) :: vdotB, B2 - type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z) - type(riemann_states_vec3) :: cm !< Conservative momentum variables - integer :: i, j, k, l, q !< Generic loop iterators + type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z) + type(riemann_states_vec3) :: cm !< Conservative momentum variables + integer :: i, j, k, l !< Generic loop iterators + integer :: Re_size_loc1, Re_size_loc2 !< host copies of Re_size; amdflang reads the declare-target original stale cross-TU integer, dimension(3) :: idx_right_phys !< Physical (j,k,l) indices for right state. - integer :: Re_size_loc1, Re_size_loc2 !< host copy of Re_size; amdflang reads the declare-target original stale cross-TU ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & @@ -113,16 +109,15 @@ contains #:set SV = STENCIL_VAR #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, & - & alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, & - & Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, & - & Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, & - & vel_grad_L, vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, & - & vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, & - & rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, & - & H_R, ptilde_L, ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, & - & Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R, Gamm_L, Gamm_R, & - & flux_tau_L, flux_tau_R]', firstprivate='[Re_size_loc1, Re_size_loc2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, & + & Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, & + & Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, & + & B2, b4, cm, pcorr, zcoef, vel_grad_L, vel_grad_R, idx_right_phys, vel_L_rms, vel_R_rms, & + & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, & + & c_avg, pres_L, pres_R, rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, & + & c_R, E_L, E_R, H_L, H_R, ptilde_L, ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, & + & eps, c_sum_Yi_Phi, Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, & + & Y_R]', firstprivate='[Re_size_loc1, Re_size_loc2]') do l = ${Z_BND}$%beg, ${Z_BND}$%end do k = ${Y_BND}$%beg, ${Y_BND}$%end do j = ${X_BND}$%beg, ${X_BND}$%end @@ -200,37 +195,12 @@ contains alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + call s_accumulate_mixture_properties(num_fluids, alpha_rho_L, alpha_L, rho_L, gamma_L, pi_inf_L, qv_L) + call s_accumulate_mixture_properties(num_fluids, alpha_rho_R, alpha_R, rho_R, gamma_R, pi_inf_R, qv_R) if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_L(i) = 0._wp - if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L, Re_size_loc1, Re_size_loc2) + call s_compute_interface_reynolds(alpha_R, Re_R, Re_size_loc1, Re_size_loc2) end if if (chemistry) then @@ -322,37 +292,6 @@ contains H_R = (E_R + pres_R)/rho_R end if - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) - end if - - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) - ! Elastic contribution to energy if G large enough TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do - end if - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, vel_L_rms, 0._wp, c_L, & & qv_L) @@ -439,16 +378,6 @@ contains & + s_M*s_P*(rho_L*vel_L(dir_idx(i)) - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) & & + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do - else if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) & - & + dir_flg(dir_idx(i))*pres_R - tau_e_R(dir_idx_tau(i))) & - & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*pres_L & - & - tau_e_L(dir_idx_tau(i))) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) - end do else $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels @@ -482,17 +411,6 @@ contains & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) - s_P*vel_L(dir_idx(1) & & )*(E_L + pres_L - ptilde_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & & *pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) - end do - flux_rsx_vf(${SF('')}$, & - & eqn_idx%E) = (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - & - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) + s_M*s_P*(E_L - E_R))/(s_M & - & - s_P) else flux_rsx_vf(${SF('')}$, & & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R) - s_P*vel_L(dir_idx(1))*(E_L & @@ -500,16 +418,6 @@ contains & - vel_L_rms)/2._wp end if - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 ! TODO: this indexing may be slow - flux_rsx_vf(${SF('')}$, & - & eqn_idx%stress%beg - 1 + i) = (s_M*(rho_R*vel_R(dir_idx(1))*tau_e_R(i)) & - & - s_P*(rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + s_M*s_P*(rho_L*tau_e_L(i) & - & - rho_R*tau_e_R(i)))/(s_M - s_P) - end do - end if - ! Advection flux and source: interface velocity for volume fraction transport $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end @@ -581,17 +489,6 @@ contains flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do end if - - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%cont%end + 2) = flux_gsrc_rsx_vf(${SF('')}$, & - & eqn_idx%cont%end + 2) + (s_M*tau_e_R(4) - s_P*tau_e_L(4))/(s_M - s_P) - - $:GPU_LOOP(parallelism='[seq]') - do i = eqn_idx%stress%beg, eqn_idx%stress%end - flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) - end do - end if #:endif end do end do @@ -648,23 +545,8 @@ contains end do end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_L(i) = 0._wp - if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do + call s_compute_interface_reynolds(alpha_L, Re_L, Re_size_loc1, Re_size_loc2) + call s_compute_interface_reynolds(alpha_R, Re_R, Re_size_loc1, Re_size_loc2) if (shear_stress) then $:GPU_LOOP(parallelism='[seq]') diff --git a/src/simulation/m_riemann_state.fpp b/src/simulation/m_riemann_state.fpp index 595f785d39..d9a53ad491 100644 --- a/src/simulation/m_riemann_state.fpp +++ b/src/simulation/m_riemann_state.fpp @@ -11,7 +11,7 @@ module m_riemann_state use m_derived_types use m_global_parameters - use m_constants, only: riemann_solver_hll, riemann_solver_hlld + use m_constants, only: riemann_solver_hll, riemann_solver_hlld, verysmall use m_hb_function implicit none @@ -1016,6 +1016,133 @@ contains end subroutine s_calculate_bulk_stress_tensor + !> Accumulate the mixture density, specific heat ratio function, liquid stiffness function, and internal energy reference of one + !! Riemann state from its partial densities and volume fractions. The number of fluids is an explicit argument because the + !! 5-equation bubble model accumulates over num_fluids - 1 fluids. + subroutine s_accumulate_mixture_properties(nf, alpha_rho_K, alpha_K, rho_K, gamma_K, pi_inf_K, qv_K) + + $:GPU_ROUTINE(function_name='s_accumulate_mixture_properties', parallelism='[seq]', cray_inline=True) + + integer, intent(in) :: nf !< Number of fluids to accumulate over + real(wp), dimension(nf), intent(in) :: alpha_rho_K, alpha_K + real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K + integer :: i !< Loop iterator over fluids + + rho_K = 0._wp + gamma_K = 0._wp + pi_inf_K = 0._wp + qv_K = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nf + rho_K = rho_K + alpha_rho_K(i) + gamma_K = gamma_K + alpha_K(i)*gammas(i) + pi_inf_K = pi_inf_K + alpha_K(i)*pi_infs(i) + qv_K = qv_K + alpha_rho_K(i)*qvs(i) + end do + + end subroutine s_accumulate_mixture_properties + + !> Compute the shear and volume Reynolds numbers of one Riemann state by inverse-weighting the fluid Reynolds numbers with the + !! volume fractions. + subroutine s_compute_interface_reynolds(alpha_K, Re_K, Re_size_loc1, Re_size_loc2) + + $:GPU_ROUTINE(function_name='s_compute_interface_reynolds', parallelism='[seq]', cray_inline=True) + + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: alpha_K + #:else + real(wp), dimension(num_fluids), intent(in) :: alpha_K + #:endif + real(wp), dimension(2), intent(out) :: Re_K + !> host copies of Re_size; amdflang reads the declare-target original stale cross-TU + integer, intent(in) :: Re_size_loc1, Re_size_loc2 + integer :: i, q !< Loop iterators + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_K(i) = dflt_real + + if (merge(Re_size_loc1, Re_size_loc2, i == 1) > 0) Re_K(i) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, merge(Re_size_loc1, Re_size_loc2, i == 1) + Re_K(i) = alpha_K(Re_idx(i, q))/Res_gs(i, q) + Re_K(i) + end do + + Re_K(i) = 1._wp/max(Re_K(i), sgm_eps) + end do + + end subroutine s_compute_interface_reynolds + + !> Accumulate the hypoelastic stress contribution to the energies of the left and right Riemann states: mix the shear modulus + !! over the fluids, scale it by the continuum damage state when damage is modeled, and add the elastic energy of each stress + !! component (doubled for the shear components) when both mixture moduli are non-negligible. The elastic shear stresses are + !! loaded from the state buffers by the caller, which reuses them for the stress fluxes and elastic wave speeds. The G > + !! verysmall gate is a deliberate maintainer ruling that replaces HLL's former hard-coded G > 1000 stability floor, retiring its + !! "TODO take out if statement if stable without". + subroutine s_compute_hypoelastic_interface_energy(nf, alpha_L, alpha_R, damage_L, damage_R, tau_e_L, tau_e_R, G_L, G_R, E_L, & + & E_R) + + $:GPU_ROUTINE(function_name='s_compute_hypoelastic_interface_energy', parallelism='[seq]', cray_inline=True) + + integer, intent(in) :: nf !< Number of fluids to mix the shear modulus over + real(wp), dimension(nf), intent(in) :: alpha_L, alpha_R !< Left and right volume fractions + real(wp), intent(in) :: damage_L, damage_R !< Continuum damage states (referenced only when cont_damage) + real(wp), dimension(6), intent(in) :: tau_e_L, tau_e_R !< Left and right elastic shear stresses + real(wp), intent(out) :: G_L, G_R !< Left and right mixture shear moduli + real(wp), intent(inout) :: E_L, E_R !< Left and right state energies + integer :: i !< Loop iterator + + G_L = 0._wp; G_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nf + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + + if (cont_damage) then + G_L = G_L*max((1._wp - damage_L), 0._wp) + G_R = G_R*max((1._wp - damage_R), 0._wp) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(eqn_idx%stress%beg - 1 + i == shear_indices)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + end if + end if + end do + + end subroutine s_compute_hypoelastic_interface_energy + + !> Compute the advective part of the HLLC star-state momentum flux in the wave-normal direction (pressure excluded), used to + !! assemble the geometrical source flux of the cylindrical and azimuthal sweeps. + function f_compute_hllc_star_momentum_flux(rho_L, rho_R, vel_L_norm, vel_R_norm, s_M, s_P, s_S, xi_L, xi_R, xi_M, xi_P, & + & dir_flg_norm) result(flux_mom) + + $:GPU_ROUTINE(function_name='f_compute_hllc_star_momentum_flux', parallelism='[seq]', cray_inline=True) + + real(wp), intent(in) :: rho_L, rho_R !< Left and right densities + real(wp), intent(in) :: vel_L_norm, vel_R_norm !< Left and right wave-normal velocities + real(wp), intent(in) :: s_M, s_P, s_S !< Clamped left/right and contact wave speeds + real(wp), intent(in) :: xi_L, xi_R, xi_M, xi_P !< Star-state compression factors and upwind selectors + real(wp), intent(in) :: dir_flg_norm !< Direction flag of the wave-normal direction + real(wp) :: flux_mom + + flux_mom = xi_M*(rho_L*(vel_L_norm*vel_L_norm + s_M*(xi_L*(dir_flg_norm*s_S + (1._wp - dir_flg_norm)*vel_L_norm) & + & - vel_L_norm))) + xi_P*(rho_R*(vel_R_norm*vel_R_norm + s_P*(xi_R*(dir_flg_norm*s_S + (1._wp & + & - dir_flg_norm)*vel_R_norm) - vel_R_norm))) + + end function f_compute_hllc_star_momentum_flux + !> Deallocation and/or disassociation procedures that are needed to finalize the selected Riemann problem solver subroutine s_finalize_riemann_solver(flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir) diff --git a/toolchain/bench.yaml b/toolchain/bench.yaml index d09d9863e3..617d9138f3 100644 --- a/toolchain/bench.yaml +++ b/toolchain/bench.yaml @@ -8,6 +8,26 @@ path: benchmarks/5eq_rk3_weno3_hllc/case.py args: [] +# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_hll +# Additional Benchmarked Features +# - model_equations : 2 +# - time_stepper : 3 +# - weno_order : 3 +# - riemann_solver : hll +- slug: 5eq_rk3_weno3_hll + path: benchmarks/5eq_rk3_weno3_hll/case.py + args: [] + +# Benchmark model_equations_2_time_stepper_3_weno_order_3_riemann_solver_lax_friedrichs +# Additional Benchmarked Features +# - model_equations : 2 +# - time_stepper : 3 +# - weno_order : 3 +# - riemann_solver : lax_friedrichs +- slug: 5eq_rk3_weno3_lf + path: benchmarks/5eq_rk3_weno3_lf/case.py + args: [] + # Benchmark viscosity_weno_Re_flux_T_weno_order_5_bubbles_T_bubble_mode_3_acoustic_T # Additional Benchmarked Features # - viscosity enabled