Encountered torch tensor shape issue when trying to solve a transient problem

nihalpushkar11 · July 11, 2023, 4:46pm

Hi,
I am interested to simulate a problem where I am applying an time varied extension at one end of the body and trying to visualize the stress generated as a function of time on the same.

The following is my code for the same (using inbuilt Linear elasticity problem):

from sympy import Symbol, Eq
import numpy as np

import modulus.sym
from modulus.sym.hydra import instantiate_arch, ModulusConfig
from modulus.sym.solver import Solver
from modulus.sym.domain import Domain
from modulus.sym.geometry.primitives_2d import Rectangle, Circle
from modulus.sym.domain.constraint import (
    PointwiseBoundaryConstraint,
    PointwiseInteriorConstraint,
)
from modulus.sym.domain.validator import PointwiseValidator
from modulus.sym.domain.inferencer import PointwiseInferencer
from modulus.sym.key import Key
from modulus.sym.node import Node
from modulus.sym.eq.pdes.linear_elasticity import LinearElasticityPlaneStress

@modulus.sym.main(config_path="conf", config_name="config")
def run(cfg: ModulusConfig) -> None:
    # specify Panel properties
    E = 200.0 * 10**9  # Pa
    nu = 0.33
    lambda_ = nu * E / ((1 + nu) * (1 - 2 * nu))  # Pa
    mu_real = E / (2 * (1 + nu))  # Pa
    lambda_ = lambda_ / mu_real  # Dimensionless
    mu = 1.0  # Dimensionless

    # make list of nodes to unroll graph on
    le = LinearElasticityPlaneStress(lambda_=lambda_, mu = mu, rho = 1)
    elasticity_net = instantiate_arch(
        input_keys=[
            Key("x"), 
            Key("y"),
            Key('t'),
            ],
        output_keys=[
            Key("u"),
            Key("v"),
            Key("sigma_xx"),
            Key("sigma_yy"),
            Key("sigma_xy"),
        ],
        cfg=cfg.arch.fully_connected,
    )
    nodes = le.make_nodes() + [elasticity_net.make_node(name="elasticity_network")]

    # Generate the sample geometry
    # coordinates for working field
    x, y = Symbol("x"), Symbol("y")
    panel_origin = (-.02, -.055) # (m, m) left bottom corner
    panel_dim = (0.04, 0.11) # (m, m)
    circle_radius = 0.005 # (m)
    circle1_origin = (-.02, -.01) # (m, m)
    circle2_origin = (.02, .01) # (m, m)
    panel = Rectangle(panel_origin, (panel_origin[0] + panel_dim[0], panel_origin[1] + panel_dim[1]))
    c1 = Circle(circle1_origin, circle_radius)
    c2 = Circle(circle2_origin, circle_radius)
    thckness = 3e-3 #m

    # time 
    t = Symbol('t')
    
    # bound on time scale
    bounds_t = (0, 1)
    time_range = {t: bounds_t}

    # final geometry after remove those circle form the rectangular panel
    geo = panel - c1 - c2

    # bounds on the geometry
    bounds_x = (panel_origin[0], panel_origin[0] + panel_dim[0])
    bounds_y = (panel_origin[1], panel_origin[1] + panel_dim[1])

    # DOMAIN
    domain = Domain()

    # Boundary conditions and Initial Conditions
    # left wall: no force / tractions = free surface
    panel_left_1 = PointwiseBoundaryConstraint(
            nodes=nodes,
            geometry=geo,
            outvar={"traction_x": 0.0, "traction_y": 0.0},
            batch_size = cfg.batch_size.panel_left,
            criteria = Eq(x, panel_origin[0]), # equations to be need to defined here of constarints
            parameterization=time_range, # t is a sympy symbol!!
            batch_per_epoch=500,
        )
    domain.add_constraint(panel_left_1, "panel_left_1")

    panel_left_2 = PointwiseBoundaryConstraint(
            nodes=nodes,
            geometry=geo,
            outvar={"traction_x": 0.0, "traction_y": 0.0},
            batch_size = cfg.batch_size.panel_left,
            criteria = Eq((x - circle1_origin[0])**2 + (y - circle1_origin[1])**2, circle_radius**2), # equations to be need to defined here of constarints
            parameterization=time_range, # t is a sympy symbol!!
            batch_per_epoch=500,
        )
    domain.add_constraint(panel_left_2, "panel_left_2")

    # right wall
    panel_right_1 = PointwiseBoundaryConstraint(
            nodes=nodes,
            geometry=geo,
            outvar={"traction_x": 0.0, "traction_y": 0.0},
            batch_size = cfg.batch_size.panel_right,
            criteria = Eq(x, panel_origin[0] + panel_dim[0]), # defining the equations (constraints)
            parameterization=time_range, # parametric terms
            batch_per_epoch=500,
        )
    domain.add_constraint(panel_right_1, "panel_right_1")

    panel_right_2 = PointwiseBoundaryConstraint(
            nodes=nodes,
            geometry=geo,
            outvar={"traction_x": 0.0, "traction_y": 0.0},
            batch_size = cfg.batch_size.panel_right,
            criteria = Eq((x - circle2_origin[0])**2 + (y - circle2_origin[1])**2, circle_radius**2), # defining the equations (constraints)
            parameterization=time_range,
            batch_per_epoch=500,
        )
    domain.add_constraint(panel_right_2, "panel_right_2")

    # bottom wall
    panel_bottom = PointwiseBoundaryConstraint(
            nodes=nodes,
            geometry=geo,
            outvar = {"v": 0.0},
            batch_size = cfg.batch_size.panel_bottom,
            criteria = Eq(y, panel_origin[1]),
            parameterization=time_range,
            batch_per_epoch=500,
        )
    domain.add_constraint(panel_bottom, "panel_bottom")

    # top wall
    # u_top = constant*t {the pseudo time constarint introduced here}
    panel_top = PointwiseBoundaryConstraint(
            nodes=nodes,
            geometry=geo,
            outvar={"v": 0.01*t},
            batch_size = cfg.batch_size.panel_top,
            criteria = Eq(y, panel_origin[1] + panel_dim[1]),
            parameterization=time_range,
            batch_per_epoch=500,
        )
    domain.add_constraint(panel_top, "panel_top")

    # inferenceer if required

    # constarints to be hold: interior
    LE = PointwiseInteriorConstraint(
        nodes=nodes,
        geometry=geo,
        outvar={
            "equilibrium_x": 0.0,
            "equilibrium_y": 0.0,
            "stress_disp_xx": 0.0,
            "stress_disp_yy": 0.0,
            "stress_disp_xy": 0.0,
        },
        batch_size=cfg.batch_size.lr_interior,
        bounds={x: bounds_x, y: bounds_y},
        lambda_weighting={
            "equilibrium_x": Symbol("sdf"),
            "equilibrium_y": Symbol("sdf"),
            "stress_disp_xx": Symbol("sdf"),
            "stress_disp_yy": Symbol("sdf"),
            "stress_disp_xy": Symbol("sdf"),
        },
        parameterization=time_range,
        batch_per_epoch=500,
    )
    domain.add_constraint(LE, "interior")

    # initial condition @t = 0 => no displacements
    IC = PointwiseInteriorConstraint(
        nodes = nodes,
        geometry = geo,
        outvar={"u": 0.0, "v":0.0},
        batch_size=cfg.batch_size.lr_interior,
        # criteria = Eq(t, bounds_t[0]),
        # bounds={x: bounds_x, y: bounds_y},
        lambda_weighting={"u": Symbol("sdf"), "v":Symbol("sdf")},
        parameterization=time_range,
        batch_per_epoch=500,
    )
    domain.add_constraint(IC, "IC")

    for i, specific_time in enumerate(np.linspace(0, bounds_t[1], 10)):
        invar_numpy = geo.sample_interior(
            100000,
            bounds={x: bounds_x, y: bounds_y},
            parameterization={t: specific_time},
        )
        grid_inference = PointwiseInferencer(
            nodes=nodes,
            invar=invar_numpy,
            output_names=["u", "v", "sigma_xx", "sigma_yy", "sigma_xy"],
            batch_size=4096,
        )
        domain.add_inferencer(grid_inference, name="time_slice_" + str(i+1).zfill(3))

    # make solver
    slv = Solver(cfg, domain)

    # start solver
    slv.solve()


if __name__ == "__main__":
    run()

But I am encountering the issue below:

  ret = run_job(
[22:05:23] - JIT using the NVFuser TorchScript backend
[22:05:23] - JitManager: {'_enabled': True, '_arch_mode': <JitArchMode.ONLY_ACTIVATION: 1>, '_use_nvfuser': True, '_autograd_nodes': False}
[22:05:23] - GraphManager: {'_func_arch': False, '_debug': False, '_func_arch_allow_partial_hessian': True}
[22:05:33] - Installed PyTorch version 2.0.1+cu117 is not TorchScript supported in Modulus. Version 1.14.0a0+410ce96 is officially supported.
[22:05:33] - attempting to restore from: outputs/panel
[22:05:33] - Success loading optimizer: outputs/panel/optim_checkpoint.0.pth
[22:05:33] - Fail loading model: outputs/panel/elasticity_network.0.pth
Error executing job with overrides: []
Traceback (most recent call last):
  File "panel.py", line 211, in run
    slv.solve()
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/modulus/sym/solver/solver.py", line 173, in solve
    self._train_loop(sigterm_handler)
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/modulus/sym/trainer.py", line 535, in _train_loop
    loss, losses = self._cuda_graph_training_step(step)
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/modulus/sym/trainer.py", line 722, in _cuda_graph_training_step
    self.apply_gradients()
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/modulus/sym/trainer.py", line 86, in adam_apply_gradients
    self.scaler.step(self.optimizer)
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/torch/cuda/amp/grad_scaler.py", line 315, in step
    return optimizer.step(*args, **kwargs)
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/torch/optim/lr_scheduler.py", line 69, in wrapper
    return wrapped(*args, **kwargs)
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/torch/optim/optimizer.py", line 280, in wrapper
    out = func(*args, **kwargs)
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/torch/optim/optimizer.py", line 33, in _use_grad
    ret = func(self, *args, **kwargs)
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/torch/optim/adam.py", line 141, in step
    adam(
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/torch/optim/adam.py", line 281, in adam
    func(params,
  File "/home/neo11/anaconda3/envs/py38/lib/python3.8/site-packages/torch/optim/adam.py", line 446, in _multi_tensor_adam
    torch._foreach_add_(device_exp_avgs, device_grads, alpha=1 - beta1)
RuntimeError: The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 1

I don’t understand the error well here, like what is the error telling me to do? what are “a” & “b” here? Though I dont face any issues in solving the static problem, thus making me curious why is this happening so in transient problem? is because of the introduction of time axis?

Thank you

ngeneva · July 14, 2023, 2:01am

Hi @nihalpushkar11

Looks like you’re failing to load a previous checkpoint:

[22:05:33] - Fail loading model: outputs/panel/elasticity_network.0.pth

Perhaps you updated your model architecture so the previous checkpoint is outdated? Try renaming / deleting you old checkpoint and restarting the run.

nihalpushkar11 · July 14, 2023, 3:31am

hi @ngeneva,
Thanks for the reply, deleting the same resolved the issue. But now encountered a new problem, the loss function is reaching Nans:

  ret = run_job(
[08:59:12] - JIT using the NVFuser TorchScript backend
[08:59:12] - JitManager: {'_enabled': True, '_arch_mode': <JitArchMode.ONLY_ACTIVATION: 1>, '_use_nvfuser': True, '_autograd_nodes': False}
[08:59:12] - GraphManager: {'_func_arch': False, '_debug': False, '_func_arch_allow_partial_hessian': True}
[08:59:32] - Installed PyTorch version 2.0.1+cu117 is not TorchScript supported in Modulus. Version 1.14.0a0+410ce96 is officially supported.
[08:59:32] - attempting to restore from: outputs/dm_panel
[08:59:32] - optimizer checkpoint not found
[08:59:32] - model damage_network.0.pth not found
[08:59:42] - loss went to Nans

could you please help me out with this, also please let me know if you need the scripts required to obtain this error, I would be happy to share it with you.

Thank you

ngeneva · July 20, 2023, 7:04am

Hi @nihalpushkar11

Unfortunately nans are quite more difficult to debug. I would suggest trying to single out which constraint is causing this issue. Once that’s identified then you can start to debug why this would be the case. It could also be something with the hyper-parameters your using, your loss calc, etc. Best of luck!

nihalpushkar11 · July 20, 2023, 8:03am

sorry for not mentioning earlier, i have resolved the issue by enumerating the constraints and isolated the error segment 2 days before, thanks for the reply.

system · August 3, 2023, 8:04am

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Fluctuating loss function Technical Support (PhysicsNeMo Only)	2	852	July 28, 2023
Working with custom constraints: criteria and outvar Technical Support (PhysicsNeMo Only)	3	935	June 25, 2023
Add constraints and inferencers for Navier-Stokes equations (time=True) Technical Support (PhysicsNeMo Only)	4	1180	November 4, 2022
Dynamic Linear elasticity problem Technical Support (PhysicsNeMo Only)	1	680	July 14, 2023
Solving by adding a time derivative term in Lid Cavity Example Technical Support (PhysicsNeMo Only)	1	71	November 10, 2024
Applying Time Windows to Static simulation Technical Support (PhysicsNeMo Only)	3	1630	April 19, 2024
API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::resolveSlots::1480, condition: allInputDimensionsSpecified(routine) TensorRT tensorrt , cuda , natural-language-processing-nlp	6	11452	February 1, 2024
Could not unroll graph! Technical Support (PhysicsNeMo Only)	12	2089	January 9, 2023
Training Custom FasterRCNN resnet50 Object detection issue TAO Toolkit	9	1119	October 12, 2021
Error outputs for dynamic height and width TensorRT	8	816	November 28, 2022

Encountered torch tensor shape issue when trying to solve a transient problem

Related topics