| A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | RAW DATA | ||||||||||||||||
2 | 89364550 | Compiling | xla/service/gpu/runtime/topk_kernel.cu.cc | [for | host] | ||||||||||||
3 | 89296475 | Compiling | xla/service/gpu/runtime/topk_kernel.cu.cc | Group | Time (min) | XLA | Time (min) | LLVM | Time (min) | MLIR | Time (min) | ||||||
4 | 89167697 | Compiling | xla/service/gpu/runtime/topk_kernel.cu.cc | xla | 434.68 | xla/service | 205.31 | llvm/lib/Target | 110.96 | mlir/lib/Conversion | 20.96 | ||||||
5 | 76184087 | Compiling | xla/service/cpu/runtime_matmul.cc | llvm | 274.52 | xla/service/gpu | 87.28 | llvm/lib/CodeGen | 48.07 | mlir/lib/Dialect/Linalg | 12.48 | ||||||
6 | 74802937 | Compiling | xla/service/cpu/runtime_matmul.cc | [for | host] | mlir | 122.92 | xla/tests | 82.20 | llvm/lib/Transforms | 46.22 | mlir/lib/Dialect/LLVMIR | 5.94 | ||||
7 | 73283787 | Compiling | xla/service/cpu/runtime_matmul.cc | external/nccl_archive | 113.30 | xla/mlir | 57.29 | llvm/lib/Analysis | 14.47 | mlir/lib/Target | 5.86 | ||||||
8 | 61871248 | Compiling | xla/service/algebraic_simplifier_test.cc | src | 72.32 | xla/mlir_hlo | 43.57 | llvm/lib/ExecutionEngine | 11.09 | mlir/lib/Dialect/SparseTensor | 5.69 | ||||||
9 | 56213245 | Compiling | xla/mlir_hlo/mhlo/IR/hlo_ops.cc | [for | host] | lib | 12.49 | xla/tests/fuzz | 35.52 | llvm/lib/DebugInfo | 8.73 | mlir/lib/Dialect/SPIRV | 5.64 | ||||
10 | 55686364 | Compiling | mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | [for | host] | tsl | 7.23 | xla/service/cpu | 26.35 | llvm/lib/IR | 7.46 | mlir/lib/IR | 5.26 | ||||
11 | 55528704 | Compiling | xla/mlir_hlo/mhlo/IR/hlo_ops.cc | stablehlo | 6.48 | xla/mlir_hlo/mhlo | 18.74 | llvm/lib/Support | 7.26 | mlir/lib/Target/LLVMIR | 5.08 | ||||||
12 | 54699917 | Compiling | mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | absl | 2.22 | xla/python | 17.95 | llvm/lib/MC | 5.02 | mlir/lib/Dialect/Vector | 4.90 | ||||||
13 | 53644369 | Compiling | mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | ortools | 1.80 | xla/hlo | 14.47 | llvm/lib/Object | 2.77 | mlir/lib/Dialect/Affine | 4.76 | ||||||
14 | 53400281 | Compiling | xla/mlir_hlo/mhlo/IR/hlo_ops.cc | external/tsl | 1.05 | xla/client | 11.05 | llvm/lib/Passes | 2.33 | mlir/lib/Dialect/SCF | 3.35 | ||||||
15 | 44256122 | Compiling | xla/service/spmd/spmd_partitioner_test.cc | xla/pjrt | 10.62 | llvm/utils/TableGen | 2.33 | mlir/lib/Dialect/Tensor | 3.01 | ||||||||
16 | 43724572 | Compiling | stablehlo/dialect/StablehloOps.cpp | xla/stream_executor | 8.91 | llvm/lib/ProfileData | 1.78 | mlir/lib/Transforms | 2.99 | ||||||||
17 | 41387165 | Compiling | stablehlo/dialect/StablehloOps.cpp | [for | host] | xla/mlir_hlo/gml_st | 8.34 | llvm/lib/Bitcode | 1.59 | mlir/lib/Dialect/MemRef | 2.96 | ||||||
18 | 40005478 | Compiling | llvm/lib/Target/X86/X86ISelLowering.cpp | xla/client/lib | 8.01 | mlir/lib/Dialect/Tosa | 2.89 | ||||||||||
19 | 39734014 | Compiling | stablehlo/dialect/StablehloOps.cpp | xla/translate | 7.89 | mlir/lib/Dialect/Arith | 2.85 | ||||||||||
20 | 38920236 | Compiling | llvm/lib/Target/X86/X86ISelLowering.cpp | xla/hlo/evaluator | 6.99 | Targets | mlir/lib/Dialect/Bufferization | 2.81 | |||||||||
21 | 38200195 | Compiling | mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp | xla/mlir_hlo/tools | 5.71 | llvm/lib/Target/AMDGPU | 31.57 | mlir/tools/mlir-tblgen | 2.37 | ||||||||
22 | 37996425 | Compiling | mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp | [for | host] | xla/service/spmd | 5.46 | llvm/lib/Target/X86 | 19.32 | mlir/lib/Analysis | 2.05 | ||||||
23 | 37787987 | Compiling | external/nccl_archive/src/collectives/device/premulsum_f64_all_reduce.cu.cc | xla/tools | 5.42 | llvm/lib/Target/AArch64 | 17.20 | mlir/lib/Dialect/GPU | 2.04 | ||||||||
24 | 37759444 | Compiling | external/nccl_archive/src/collectives/device/premulsum_f64_all_reduce.cu.cc | [for | host] | xla/mlir/backends | 4.97 | llvm/lib/Target/ARM | 13.76 | mlir/lib/Dialect/Transform | 1.66 | ||||||
25 | 37150554 | Compiling | external/nccl_archive/src/collectives/device/premulsum_f64_all_reduce.cu.cc | xla/mlir/runtime | 4.57 | llvm/lib/Target/PowerPC | 10.03 | mlir/lib/Dialect/Shape | 1.55 | ||||||||
26 | 37089689 | Compiling | external/nccl_archive/src/collectives/device/premulsum_f64_all_reduce.cu.cc | [for | host] | xla/python/ifrt | 4.48 | llvm/lib/Target/RISCV | 8.75 | mlir/lib/ExecutionEngine | 1.43 | ||||||
27 | 36986878 | Compiling | llvm/lib/Target/X86/X86ISelLowering.cpp | [for | host] | xla/service/llvm_ir | 4.12 | llvm/lib/Target/SystemZ | 4.91 | mlir/lib/Interfaces | 1.40 | ||||||
28 | 36571676 | Compiling | mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp | xla/hlo/ir | 4.08 | llvm/lib/Target/NVPTX | 4.59 | mlir/lib/Dialect/Math | 1.29 | ||||||||
29 | 34594476 | Compiling | mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | xla/runtime | 3.69 | mlir/lib/Transforms/Utils | 1.28 | ||||||||||
30 | 34082074 | Compiling | mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | [for | host] | xla/mlir_hlo/lhlo | 3.28 | mlir/lib/Dialect/Async | 1.26 | ||||||||
31 | 32750273 | Compiling | mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | xla/translate/hlo_to_mhlo | 3.05 | mlir/lib/AsmParser | 1.23 | ||||||||||
32 | 32423820 | Compiling | xla/service/sharding_propagation_test.cc | xla/mlir_hlo/transforms | 2.97 | mlir/lib/Conversion/TosaToLinalg | 1.21 | ||||||||||
33 | 32420841 | Compiling | xla/mlir_hlo/mhlo/transforms/legalize_to_linalg/legalize_to_linalg.cc | xla/mlir/tools | 2.74 | mlir/lib/Conversion/SPIRVToLLVM | 1.15 | ||||||||||
34 | 32363737 | Compiling | xla/service/gpu/ir_emitter_unnested.cc | xla/stream_executor/tpu | 2.62 | mlir/lib/Conversion/LLVMCommon | 1.09 | ||||||||||
35 | 32318548 | Compiling | xla/service/gpu/ir_emitter_unnested.cc | [for | host] | xla/translate/mhlo_to_lhlo_with_xla | 2.52 | mlir/lib/Dialect/PDL | 1.02 | ||||||||
36 | 32001800 | Compiling | xla/mlir_hlo/mhlo/transforms/legalize_to_linalg/legalize_to_linalg.cc | xla/stream_executor/cuda | 2.43 | ||||||||||||
37 | 31659706 | Compiling | xla/mlir_hlo/mhlo/transforms/legalize_to_linalg/legalize_to_linalg.cc | [for | host] | xla/backends | 2.14 | ||||||||||
38 | 31179380 | Compiling | xla/service/gpu/ir_emitter_unnested.cc | xla/translate/mhlo_to_hlo | 2.11 | ||||||||||||
39 | 31147758 | Compiling | src/cpu/rnn/ref_rnn.cpp | xla/tests/dot_operation_test.cc | 1.89 | NCCL | |||||||||||
40 | 30615886 | Compiling | xla/service/gpu/cudnn_fused_conv_rewriter_test.cc | xla/python/pjrt_ifrt | 1.69 | external/nccl_archive/src/collectives/device/premulsum_f64_all_reduce.cu.cc | 2.50 | ||||||||||
41 | 30284146 | Compiling | src/cpu/rnn/ref_rnn.cpp | xla/hlo/experimental | 1.63 | external/nccl_archive/src/collectives/device/sum_f64_all_reduce.cu.cc | 1.84 | ||||||||||
42 | 29639403 | Compiling | src/cpu/rnn/ref_rnn.cpp | [for | host] | xla/mlir_hlo/deallocation | 1.54 | external/nccl_archive/src/collectives/device/prod_f64_all_reduce.cu.cc | 1.77 | ||||||||
43 | 29078402 | Compiling | mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp | xla/python/tpu_driver | 1.51 | external/nccl_archive/src/collectives/device/max_f64_all_reduce.cu.cc | 1.76 | ||||||||||
44 | 29057492 | Compiling | mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp | [for | host] | xla/hlo/utils | 1.37 | external/nccl_archive/src/collectives/device/min_f64_all_reduce.cu.cc | 1.73 | ||||||||
45 | 28459386 | Compiling | xla/service/cpu/cpu_compiler.cc | xla/tests/convolution_test.cc | 1.36 | external/nccl_archive/src/collectives/device/premulsum_i8_all_reduce.cu.cc | 1.20 | ||||||||||
46 | 28180872 | Compiling | external/nccl_archive/src/collectives/device/sum_f64_all_reduce.cu.cc | xla/rpc | 1.31 | external/nccl_archive/src/collectives/device/premulsum_f16_all_reduce.cu.cc | 1.20 | ||||||||||
47 | 28046011 | Compiling | mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp | xla/pjrt/c | 1.22 | external/nccl_archive/src/collectives/device/sumpostdiv_i8_all_reduce.cu.cc | 1.18 | ||||||||||
48 | 27887512 | Compiling | external/nccl_archive/src/collectives/device/sum_f64_all_reduce.cu.cc | [for | host] | xla/tools/multihost_hlo_runner | 1.19 | external/nccl_archive/src/collectives/device/min_f16_all_reduce.cu.cc | 1.16 | ||||||||
49 | 27750179 | Compiling | external/nccl_archive/src/collectives/device/sum_f64_all_reduce.cu.cc | xla/pjrt/gpu | 1.15 | external/nccl_archive/src/collectives/device/premulsum_u8_all_reduce.cu.cc | 1.16 | ||||||||||
50 | 27642972 | Compiling | xla/service/cpu/cpu_compiler.cc | [for | host] | xla/service/xla_compile | 1.11 | external/nccl_archive/src/collectives/device/sumpostdiv_u8_all_reduce.cu.cc | 1.16 | ||||||||
51 | 27594781 | Compiling | xla/service/gpu/gpu_compiler.cc | xla/backends/interpreter | 1.09 | external/nccl_archive/src/collectives/device/max_f16_all_reduce.cu.cc | 1.16 | ||||||||||
52 | 27490142 | Compiling | xla/service/gpu/tests/gemm_rewrite_test.cc | xla/service/algebraic_simplifier.cc | 1.06 | external/nccl_archive/src/collectives/device/sum_f16_all_reduce.cu.cc | 1.15 | ||||||||||
53 | 27357566 | Compiling | src/cpu/cpu_rnn_list.cpp | [for | host] | xla/backends/profiler | 1.05 | external/nccl_archive/src/collectives/device/prod_u8_all_reduce.cu.cc | 1.10 | ||||||||
54 | 27236189 | Compiling | xla/service/gpu/gpu_compiler.cc | [for | host] | xla/service/algebraic_simplifier_test | 1.05 | external/nccl_archive/src/collectives/device/prod_f16_all_reduce.cu.cc | 1.10 | ||||||||
55 | 26921474 | Compiling | external/nccl_archive/src/collectives/device/prod_f64_all_reduce.cu.cc | [for | host] | xla/service/algebraic_simplifier_test.cc | 1.03 | external/nccl_archive/src/collectives/device/min_i8_all_reduce.cu.cc | 1.07 | ||||||||
56 | 26897649 | Compiling | external/nccl_archive/src/collectives/device/prod_f64_all_reduce.cu.cc | xla/tests/reduce_window_test.cc | 1.02 | external/nccl_archive/src/collectives/device/max_u8_all_reduce.cu.cc | 1.07 | ||||||||||
57 | 26860833 | Compiling | external/nccl_archive/src/collectives/device/max_f64_all_reduce.cu.cc | external/nccl_archive/src/collectives/device/max_i8_all_reduce.cu.cc | 1.07 | ||||||||||||
58 | 26805559 | Compiling | xla/service/cpu/cpu_compiler.cc | external/nccl_archive/src/collectives/device/min_u8_all_reduce.cu.cc | 1.06 | ||||||||||||
59 | 26736477 | Compiling | external/nccl_archive/src/collectives/device/sum_f64_all_reduce.cu.cc | [for | host] | external/nccl_archive/src/collectives/device/sum_i8_all_reduce.cu.cc | 1.06 | ||||||||||
60 | 26722747 | Compiling | xla/service/gpu/gpu_compiler.cc | external/nccl_archive/src/collectives/device/prod_i8_all_reduce.cu.cc | 1.05 | ||||||||||||
61 | 26622259 | Compiling | external/nccl_archive/src/collectives/device/max_f64_all_reduce.cu.cc | [for | host] | external/nccl_archive/src/collectives/device/sum_u8_all_reduce.cu.cc | 1.05 | ||||||||||
62 | 26512450 | Compiling | llvm/lib/Passes/PassBuilder.cpp | [for | host] | ||||||||||||
63 | 26445425 | Compiling | src/cpu/cpu_rnn_list.cpp | ||||||||||||||
64 | 26300239 | Compiling | external/nccl_archive/src/collectives/device/min_f64_all_reduce.cu.cc | ||||||||||||||
65 | 26253554 | Compiling | external/nccl_archive/src/collectives/device/max_f64_all_reduce.cu.cc | ||||||||||||||
66 | 26172374 | Compiling | src/cpu/cpu_rnn_list.cpp | ||||||||||||||
67 | 26100197 | Compiling | external/nccl_archive/src/collectives/device/max_f64_all_reduce.cu.cc | [for | host] | ||||||||||||
68 | 26074391 | Compiling | external/nccl_archive/src/collectives/device/prod_f64_all_reduce.cu.cc | [for | host] | ||||||||||||
69 | 26031458 | Compiling | external/nccl_archive/src/collectives/device/prod_f64_all_reduce.cu.cc | ||||||||||||||
70 | 25883848 | Compiling | external/nccl_archive/src/collectives/device/min_f64_all_reduce.cu.cc | [for | host] | ||||||||||||
71 | 25834779 | Compiling | external/nccl_archive/src/collectives/device/min_f64_all_reduce.cu.cc | [for | host] | ||||||||||||
72 | 25817324 | Compiling | llvm/lib/Passes/PassBuilder.cpp | ||||||||||||||
73 | 25779318 | Compiling | llvm/lib/Passes/PassBuilder.cpp | ||||||||||||||
74 | 25677642 | Compiling | external/nccl_archive/src/collectives/device/min_f64_all_reduce.cu.cc | ||||||||||||||
75 | 25562147 | Compiling | xla/python/xla.cc | ||||||||||||||
76 | 25487790 | Compiling | stablehlo/dialect/VhloOps.cpp | ||||||||||||||
77 | 25027823 | Compiling | xla/service/memory_space_assignment_test.cc | ||||||||||||||
78 | 24918399 | Compiling | stablehlo/dialect/VhloOps.cpp | ||||||||||||||
79 | 24819981 | Compiling | stablehlo/dialect/VhloOps.cpp | [for | host] | ||||||||||||
80 | 24638966 | Compiling | xla/mlir_hlo/lhlo/IR/lhlo_ops.cc | [for | host] | ||||||||||||
81 | 24212680 | Compiling | xla/translate/mhlo_to_lhlo_with_xla/xla_translate_opt_main.cc | ||||||||||||||
82 | 23883696 | Compiling | xla/mlir_hlo/lhlo/IR/lhlo_ops.cc | ||||||||||||||
83 | 23739428 | Compiling | xla/mlir_hlo/lhlo/IR/lhlo_ops.cc | ||||||||||||||
84 | 23523715 | Compiling | xla/service/gpu/runtime/conv.cc | ||||||||||||||
85 | 23480362 | Compiling | xla/service/gpu/runtime/conv.cc | [for | host] | ||||||||||||
86 | 23440893 | Compiling | xla/tests/dot_operation_test.cc | ||||||||||||||
87 | 23408672 | Compiling | xla/hlo/evaluator/hlo_evaluator_test.cc | ||||||||||||||
88 | 23250388 | Compiling | xla/service/gpu/gpu_executable.cc | [for | host] | ||||||||||||
89 | 23233132 | Compiling | xla/service/gpu/runtime/conv.cc | ||||||||||||||
90 | 23124497 | Compiling | xla/mlir/tools/mlir_bisect/mlir_bisect.cc | ||||||||||||||
91 | 23083056 | Compiling | xla/tests/dot_operation_test.cc | ||||||||||||||
92 | 23003246 | Compiling | xla/service/gpu/nvptx_compiler_test.cc | ||||||||||||||
93 | 22939255 | Compiling | xla/translate/mhlo_to_lhlo_with_xla/xla_translate_opt_main.cc | ||||||||||||||
94 | 22859750 | Compiling | xla/runtime/custom_call_test.cc | ||||||||||||||
95 | 22643235 | Compiling | xla/tests/llvm_compiler_test.cc | ||||||||||||||
96 | 22631760 | Compiling | xla/tests/dot_operation_test.cc | ||||||||||||||
97 | 22621223 | Compiling | xla/service/gpu/tests/mlir_gpu_test_base.cc | ||||||||||||||
98 | 22562788 | Compiling | xla/tests/dot_operation_test.cc | ||||||||||||||
99 | 22533221 | Compiling | mlir/lib/Dialect/Vector/IR/VectorOps.cpp | ||||||||||||||
100 | 22295208 | Compiling | mlir/lib/Dialect/Vector/IR/VectorOps.cpp | [for | host] | ||||||||||||