==PROF== Connected to process 767616 (/usr/bin/python3.8) ==PROF== Profiling "vectorized_elementwise_kernel" - 0 (1/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 1 (2/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 2 (3/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 3 (4/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 4 (5/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 5 (6/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 6 (7/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 7 (8/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 8 (9/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 9 (10/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 10 (11/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 11 (12/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 12 (13/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 13 (14/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 14 (15/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 15 (16/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 16 (17/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 17 (18/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 18 (19/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 19 (20/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 20 (21/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 21 (22/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 22 (23/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 23 (24/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 24 (25/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 25 (26/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 26 (27/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 27 (28/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 28 (29/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 29 (30/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 30 (31/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 31 (32/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 32 (33/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 33 (34/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 34 (35/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 35 (36/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 36 (37/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 37 (38/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 38 (39/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 39 (40/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 40 (41/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 41 (42/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 42 (43/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 43 (44/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 44 (45/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 45 (46/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 46 (47/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 47 (48/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 48 (49/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 49 (50/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 50 (51/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 51 (52/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 52 (53/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 53 (54/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 54 (55/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 55 (56/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 56 (57/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 57 (58/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 58 (59/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 59 (60/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 60 (61/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 61 (62/300): 0%....50%....100% - 1 pass ==PROF== Profiling "distribution_elementwise_grid..." - 62 (63/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 63 (64/300): 0%....50%....100% - 1 pass ==PROF== Profiling "indexSelectLargeIndex" - 64 (65/300): 0%....50%....100% - 1 pass ==PROF== Profiling "fused_dropout_kernel_vec" - 65 (66/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 66 (67/300): 0%....50%....100% - 1 pass ==PROF== Profiling "transpose_readWrite_alignment..." - 67 (68/300): 0%....50%....100% - 1 pass ==PROF== Profiling "transpose_readWrite_alignment..." - 68 (69/300): 0%....50%....100% - 1 pass ==PROF== Profiling "transpose_readWrite_alignment..." - 69 (70/300): 0%....50%....100% - 1 pass ==PROF== Profiling "transpose_readWrite_alignment..." - 70 (71/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 71 (72/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 72 (73/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 73 (74/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 74 (75/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 75 (76/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 76 (77/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 77 (78/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 78 (79/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 79 (80/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 80 (81/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 81 (82/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 82 (83/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 83 (84/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 84 (85/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 85 (86/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 86 (87/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 87 (88/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 88 (89/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 89 (90/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 90 (91/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 91 (92/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 92 (93/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 93 (94/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 94 (95/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 95 (96/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 96 (97/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 97 (98/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 98 (99/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 99 (100/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 100 (101/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 101 (102/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 102 (103/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 103 (104/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 104 (105/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 105 (106/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 106 (107/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 107 (108/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 108 (109/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 109 (110/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 110 (111/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 111 (112/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 112 (113/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 113 (114/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 114 (115/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 115 (116/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 116 (117/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 117 (118/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 118 (119/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 119 (120/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 120 (121/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 121 (122/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 122 (123/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 123 (124/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 124 (125/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 125 (126/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 126 (127/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 127 (128/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 128 (129/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 129 (130/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 130 (131/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 131 (132/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 132 (133/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 133 (134/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 134 (135/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 135 (136/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 136 (137/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 137 (138/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 138 (139/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 139 (140/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 140 (141/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 141 (142/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 142 (143/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 143 (144/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 144 (145/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 145 (146/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 146 (147/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 147 (148/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 148 (149/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 149 (150/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 150 (151/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 151 (152/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 152 (153/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 153 (154/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 154 (155/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 155 (156/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 156 (157/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 157 (158/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 158 (159/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 159 (160/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 160 (161/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 161 (162/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 162 (163/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 163 (164/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 164 (165/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 165 (166/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 166 (167/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 167 (168/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 168 (169/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 169 (170/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 170 (171/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 171 (172/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 172 (173/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 173 (174/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 174 (175/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 175 (176/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 176 (177/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 177 (178/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 178 (179/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 179 (180/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 180 (181/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 181 (182/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 182 (183/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 183 (184/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 184 (185/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 185 (186/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 186 (187/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 187 (188/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 188 (189/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 189 (190/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 190 (191/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 191 (192/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 192 (193/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 193 (194/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 194 (195/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 195 (196/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 196 (197/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 197 (198/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 198 (199/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 199 (200/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 200 (201/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 201 (202/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 202 (203/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 203 (204/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 204 (205/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 205 (206/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 206 (207/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 207 (208/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 208 (209/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 209 (210/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 210 (211/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 211 (212/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 212 (213/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 213 (214/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 214 (215/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 215 (216/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 216 (217/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 217 (218/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 218 (219/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 219 (220/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 220 (221/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 221 (222/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 222 (223/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 223 (224/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 224 (225/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 225 (226/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 226 (227/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 227 (228/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 228 (229/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 229 (230/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 230 (231/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 231 (232/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 232 (233/300): 0%....50%....100% - 1 pass ==PROF== Profiling "ampere_sgemm_32x32_sliced1x4_tn" - 233 (234/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 234 (235/300): 0%....50%....100% - 1 pass ==PROF== Profiling "indexSelectLargeIndex" - 235 (236/300): 0%....50%....100% - 1 pass ==PROF== Profiling "fused_dropout_kernel_vec" - 236 (237/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 237 (238/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 238 (239/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 239 (240/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 240 (241/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 241 (242/300): 0%....50%....100% - 1 pass ==PROF== Profiling "reduce_kernel" - 242 (243/300): 0%....50%....100% - 1 pass ==PROF== Profiling "softmax_warp_forward" - 243 (244/300): 0%....50%....100% - 1 pass ==PROF== Profiling "gemv2N_kernel" - 244 (245/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 245 (246/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 246 (247/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 247 (248/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 248 (249/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 249 (250/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 250 (251/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 251 (252/300): 0%....50%....100% - 1 pass ==PROF== Profiling "reduce_kernel" - 252 (253/300): 0%....50%....100% - 1 pass ==PROF== Profiling "indexSelectLargeIndex" - 253 (254/300): 0%....50%....100% - 1 pass ==PROF== Profiling "fused_dropout_kernel_vec" - 254 (255/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 255 (256/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 256 (257/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 257 (258/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 258 (259/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 259 (260/300): 0%....50%....100% - 1 pass ==PROF== Profiling "reduce_kernel" - 260 (261/300): 0%....50%....100% - 1 pass ==PROF== Profiling "softmax_warp_forward" - 261 (262/300): 0%....50%....100% - 1 pass ==PROF== Profiling "gemv2N_kernel" - 262 (263/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 263 (264/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 264 (265/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 265 (266/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 266 (267/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 267 (268/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 268 (269/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 269 (270/300): 0%....50%....100% - 1 pass ==PROF== Profiling "reduce_kernel" - 270 (271/300): 0%....50%....100% - 1 pass ==PROF== Profiling "indexSelectLargeIndex" - 271 (272/300): 0%....50%....100% - 1 pass ==PROF== Profiling "fused_dropout_kernel_vec" - 272 (273/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 273 (274/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 274 (275/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 275 (276/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 276 (277/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 277 (278/300): 0%....50%....100% - 1 pass ==PROF== Profiling "reduce_kernel" - 278 (279/300): 0%....50%....100% - 1 pass ==PROF== Profiling "softmax_warp_forward" - 279 (280/300): 0%....50%....100% - 1 pass ==PROF== Profiling "gemv2N_kernel" - 280 (281/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 281 (282/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 282 (283/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 283 (284/300): 0%....50%....100% - 1 pass ==PROF== Profiling "GRU_elementWise_fp" - 284 (285/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 285 (286/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 286 (287/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 287 (288/300): 0%....50%....100% - 1 pass ==PROF== Profiling "reduce_kernel" - 288 (289/300): 0%....50%....100% - 1 pass ==PROF== Profiling "indexSelectLargeIndex" - 289 (290/300): 0%....50%....100% - 1 pass ==PROF== Profiling "fused_dropout_kernel_vec" - 290 (291/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 291 (292/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 292 (293/300): 0%....50%....100% - 1 pass ==PROF== Profiling "Kernel" - 293 (294/300): 0%....50%....100% - 1 pass ==PROF== Profiling "unrolled_elementwise_kernel" - 294 (295/300): 0%....50%....100% - 1 pass ==PROF== Profiling "vectorized_elementwise_kernel" - 295 (296/300): 0%....50%....100% - 1 pass ==PROF== Profiling "reduce_kernel" - 296 (297/300): 0%....50%....100% - 1 pass ==PROF== Profiling "softmax_warp_forward" - 297 (298/300): 0%....50%....100% - 1 pass ==PROF== Profiling "gemv2N_kernel" - 298 (299/300): 0%....50%....100% - 1 pass ==PROF== Profiling "CatArrayBatchedCopy" - 299 (300/300): 0%....50%....100% - 1 pass ==PROF== Trying to shutdown target application ==ERROR== The application returned an error code (9). ==WARNING== Found outstanding GPU clock reset, trying to revert...Success. [767616] python3.8@127.0.0.1 void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 4,704 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 5,468 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,707,072 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 60,672 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 60,672 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 80,896 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 52 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,707,072 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 60,672 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 60,672 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 80,896 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 52 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 15,168 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 44 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 15,168 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 44 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,060,800 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 303,360 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 4,439,040 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,400 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 15,168 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 44 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,060,800 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 303,360 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 4,439,040 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,400 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,707,072 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 60,672 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 60,672 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 80,896 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 52 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 15,168 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 44 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,060,800 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 303,360 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 121,344 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 68 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::distribution_elementwise_grid_stride_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::::distribution_nullary_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, T5)::[lambda(curandStatePhilox4_32_10 *) (instance 2)], void at::native::templates::cuda::normal_kernel(at::Tensor &, double, double, T1)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)]>(at::TensorIterator &, T4, const T5 &, T6)::[lambda(int, float) (instance 1)]>(int, at::PhiloxCudaState, T3, T4), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 4,439,040 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,400 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::indexSelectLargeIndex(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, int, int, T3, T3, long), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 448,512 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::fused_dropout_kernel_vec(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, T3, T2, at::PhiloxCudaState), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 364,096 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::FillFunctor, at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:51, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,048 ---------------------------------------------------------------------- --------------- ------------------------------ void transpose_readWrite_alignment_kernel(cublasTransposeParams, const T1 *, T1 *, const T2 *), 2023-Apr-06 16:55:51, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 7,176 ---------------------------------------------------------------------- --------------- ------------------------------ void transpose_readWrite_alignment_kernel(cublasTransposeParams, const T1 *, T1 *, const T2 *), 2023-Apr-06 16:55:51, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 10,368 ---------------------------------------------------------------------- --------------- ------------------------------ void transpose_readWrite_alignment_kernel(cublasTransposeParams, const T1 *, T1 *, const T2 *), 2023-Apr-06 16:55:51, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 7,176 ---------------------------------------------------------------------- --------------- ------------------------------ void transpose_readWrite_alignment_kernel(cublasTransposeParams, const T1 *, T1 *, const T2 *), 2023-Apr-06 16:55:51, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 10,368 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:51, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:51, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:51, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:51, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:51, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:51, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 25 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 27 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,824 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 26 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 33,016 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:52, Context 1, Stream 28 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 69,120 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, OffsetCalculator<(int)1, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:52, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 15,360 ---------------------------------------------------------------------- --------------- ------------------------------ ampere_sgemm_32x32_sliced1x4_tn, 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 66,720 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::tanh_kernel_cuda(at::TensorIterator &)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)], at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 4,096 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::indexSelectLargeIndex(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, int, int, T3, T3, long), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 14,976 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::fused_dropout_kernel_vec(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, T3, T2, at::PhiloxCudaState), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 13,120 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, OffsetCalculator<(int)1, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 614,400 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,188,032 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 609,024 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<(int)2, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 67,328 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::tanh_kernel_cuda(at::TensorIterator &)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)], at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 16,384 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::reduce_kernel<(int)512, (int)1, at::native::ReduceOp::operator ()(at::TensorIterator &)::[lambda(float, float) (instance 1)]>, unsigned int, float, (int)4>>(T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 143,360 ---------------------------------------------------------------------- --------------- ------------------------------ void ::softmax_warp_forward(T2 *, const T1 *, int, int, int), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 9,728 ---------------------------------------------------------------------- --------------- ------------------------------ void gemv2N_kernel, cublasGemvTensorStridedBatched, cublasGemvTensorStridedBatched, float>>(T13), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 727,040 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 91,520 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 29 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,280 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 30 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 31,408 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:53, Context 1, Stream 30 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 110,080 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, OffsetCalculator<(int)1, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,601,120 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,342,416 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::reduce_kernel<(int)512, (int)1, at::native::ReduceOp, unsigned int, float, (int)4>>(T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,571,904 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::indexSelectLargeIndex(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, int, int, T3, T3, long), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 14,976 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::fused_dropout_kernel_vec(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, T3, T2, at::PhiloxCudaState), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 13,120 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, OffsetCalculator<(int)1, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 614,400 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,188,032 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 609,024 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<(int)2, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 67,328 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::tanh_kernel_cuda(at::TensorIterator &)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)], at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 16,384 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::reduce_kernel<(int)512, (int)1, at::native::ReduceOp::operator ()(at::TensorIterator &)::[lambda(float, float) (instance 1)]>, unsigned int, float, (int)4>>(T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 143,360 ---------------------------------------------------------------------- --------------- ------------------------------ void ::softmax_warp_forward(T2 *, const T1 *, int, int, int), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 9,728 ---------------------------------------------------------------------- --------------- ------------------------------ void gemv2N_kernel, cublasGemvTensorStridedBatched, cublasGemvTensorStridedBatched, float>>(T13), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 727,040 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 91,520 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 31 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,280 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 32 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 31,408 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:53, Context 1, Stream 32 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 110,080 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, OffsetCalculator<(int)1, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,601,120 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,342,416 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::reduce_kernel<(int)512, (int)1, at::native::ReduceOp, unsigned int, float, (int)4>>(T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,571,904 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::indexSelectLargeIndex(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, int, int, T3, T3, long), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 14,976 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::fused_dropout_kernel_vec(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, T3, T2, at::PhiloxCudaState), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 13,120 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, OffsetCalculator<(int)1, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 614,400 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,188,032 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 609,024 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<(int)2, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 67,328 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::tanh_kernel_cuda(at::TensorIterator &)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)], at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 16,384 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::reduce_kernel<(int)512, (int)1, at::native::ReduceOp::operator ()(at::TensorIterator &)::[lambda(float, float) (instance 1)]>, unsigned int, float, (int)4>>(T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 143,360 ---------------------------------------------------------------------- --------------- ------------------------------ void ::softmax_warp_forward(T2 *, const T1 *, int, int, int), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 9,728 ---------------------------------------------------------------------- --------------- ------------------------------ void gemv2N_kernel, cublasGemvTensorStridedBatched, cublasGemvTensorStridedBatched, float>>(T13), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 727,040 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 91,520 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 33 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 51,280 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 34 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 31,408 ---------------------------------------------------------------------- --------------- ------------------------------ void GRU_elementWise_fp(int, int, int, int, const T1 *, const T1 *, const T1 *, const T1 *, cudnn::reduced_divisor, T1 *, const T2 *, T2 *, T1 *, bool, bool, int), 2023-Apr-06 16:55:53, Context 1, Stream 34 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 36,352 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 110,080 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, OffsetCalculator<(int)1, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,601,120 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,342,416 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::reduce_kernel<(int)512, (int)1, at::native::ReduceOp, unsigned int, float, (int)4>>(T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 1,571,904 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::indexSelectLargeIndex(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, int, int, T3, T3, long), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 14,976 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::fused_dropout_kernel_vec(at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, at::cuda::detail::TensorInfo, T3, T2, at::PhiloxCudaState), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 13,120 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, OffsetCalculator<(int)1, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 614,400 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 2,188,032 ---------------------------------------------------------------------- --------------- ------------------------------ void cutlass::Kernel(T1::Params), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 609,024 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<(int)2, unsigned int>, OffsetCalculator<(int)1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, T1, T2, T3, T4, T5, T6), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 67,328 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::vectorized_elementwise_kernel<(int)4, at::native::tanh_kernel_cuda(at::TensorIterator &)::[lambda() (instance 1)]::operator ()() const::[lambda() (instance 4)]::operator ()() const::[lambda(float) (instance 1)], at::detail::Array>(int, T2, T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 16,384 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::reduce_kernel<(int)512, (int)1, at::native::ReduceOp::operator ()(at::TensorIterator &)::[lambda(float, float) (instance 1)]>, unsigned int, float, (int)4>>(T3), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 143,360 ---------------------------------------------------------------------- --------------- ------------------------------ void ::softmax_warp_forward(T2 *, const T1 *, int, int, int), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 9,728 ---------------------------------------------------------------------- --------------- ------------------------------ void gemv2N_kernel, cublasGemvTensorStridedBatched, cublasGemvTensorStridedBatched, float>>(T13), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 727,040 ---------------------------------------------------------------------- --------------- ------------------------------ void at::native::::CatArrayBatchedCopy(T1 *, at::native::::CatArrInputTensorMetadata, at::native::::TensorSizeStride, int, T2), 2023-Apr-06 16:55:53, Context 1, Stream 7 Section: Command line profiler metrics ---------------------------------------------------------------------- --------------- ------------------------------ smsp__inst_executed.sum inst 91,520 ---------------------------------------------------------------------- --------------- ------------------------------