Compare commits
306 Commits
pytorch-rocm
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| af542a7f67 | |||
| 1434b62836 | |||
| 13b49f5327 | |||
| 07fe2c2e72 | |||
| 0b3f7ba0d4 | |||
| 9cf4295c81 | |||
| e81e6e81fc | |||
| 61a6e27950 | |||
| 679682ade6 | |||
| 4aa6ec5583 | |||
| b3e30b7caa | |||
| 0d111767d8 | |||
| 6d3fee3cd1 | |||
| adecb03429 | |||
| bdabb97a97 | |||
| 7a28595e50 | |||
| b42f42099f | |||
| 27ecec9dc0 | |||
| a23e6838de | |||
| 2f1628b890 | |||
| 6e345911b2 | |||
| 07774ffb15 | |||
| 74fc6e16d4 | |||
| c947b0e310 | |||
| 1e02dd5782 | |||
| 3b28b45dc2 | |||
| e8fcc2382b | |||
| 6b0480786d | |||
| 5823e62b6e | |||
| 2ac35cabf1 | |||
| 054470a536 | |||
| edcca2f203 | |||
| a04dec285c | |||
| 7517e1a790 | |||
| ca26d6d3f1 | |||
| 9e24eafa24 | |||
| 522c37e248 | |||
| dfbfb5f1c8 | |||
| f973757713 | |||
| 267edeb9ce | |||
| cf8e376b2a | |||
| 986ac3f9e4 | |||
| 55bee7a159 | |||
| ce6ffadda6 | |||
| 531c906d58 | |||
| 8211251e2f | |||
| 94e53fa152 | |||
| 511090debd | |||
| b278fa2fb0 | |||
| 4c1605ba73 | |||
| 7137ca3df2 | |||
| a318bc7bd0 | |||
| 0dfea5f5ce | |||
| d0e6019959 | |||
| 14c692ec72 | |||
| 23c7e15505 | |||
| e12a16239b | |||
| 6d4f36a4c6 | |||
| 7a8720242c | |||
| 889f532249 | |||
| 969dc270ff | |||
| a33fc0de15 | |||
| 4ef07cd255 | |||
| ee4a432f4f | |||
| c7a7aa0f43 | |||
| 921d1f5d6b | |||
| dcd8a79008 | |||
| a1225f9a53 | |||
| 186a95a2d6 | |||
| 2e27a8dfff | |||
| 089e273a59 | |||
| 63c2f071b4 | |||
| aad046e6f0 | |||
| 67ed132552 | |||
| f28a94aca9 | |||
| 52c35e25cf | |||
| e677d70c3a | |||
| 93f0485f92 | |||
| 1726410184 | |||
| f577a21b37 | |||
| b25727666d | |||
| 6ca7689012 | |||
| ed9f4f6afc | |||
| 45b8c3bfdc | |||
| ddf386b754 | |||
| 8cba90661c | |||
| a4c50846a5 | |||
| f6e62fab32 | |||
| bdbd4e9972 | |||
| 9bd3c11596 | |||
| e58ef33461 | |||
| a6912e8806 | |||
| d780788f9d | |||
| 90902bf573 | |||
| b185400a3d | |||
| fd94a9e6a9 | |||
| 438e34b9e7 | |||
| f038bcd3d2 | |||
| 25befba232 | |||
| 74205a50f4 | |||
| 0a77959ccc | |||
| 034e581379 | |||
| cf29fb687b | |||
| 4480eddaed | |||
| 21f537366b | |||
| 6fda8a1adb | |||
| d72edd1c5b | |||
| 15956b129b | |||
| 8b1f7b2fe5 | |||
| 221ee2744a | |||
| 2c72efe66f | |||
| 8a7af5653f | |||
| 376d10f7e1 | |||
| 8262896111 | |||
| 502435c5e2 | |||
| c41e1002d8 | |||
| 15b1014b36 | |||
| a61f56b7fa | |||
| 32d8d69ce2 | |||
| 572956d105 | |||
| 235371a524 | |||
| 6f94f69c0d | |||
| ad93398e01 | |||
| c23f8a1aae | |||
| ab292af57f | |||
| 8400712c23 | |||
| c7680921b3 | |||
| d9c05758d6 | |||
| e619019452 | |||
| 6aaf749822 | |||
| e6a07b5769 | |||
| d1b8be76a1 | |||
| e36696889a | |||
| 4d549c6061 | |||
| b7eaca59be | |||
| c7fde28d0d | |||
| a5aad5e577 | |||
| 246dc9b70b | |||
| c66f59a13b | |||
| 42e4f4af12 | |||
| e5f6584bdf | |||
| 792db02eec | |||
| dba1b00ecc | |||
| deb0c24a12 | |||
| 46088a35d2 | |||
| e8e883a1ad | |||
| bfb407073d | |||
| a0edeb49d7 | |||
| e09773bb8d | |||
| 886f23a6bf | |||
| 8d5ea9fd15 | |||
| bb56dc5180 | |||
| 29c193b86f | |||
| 820fcfc4d8 | |||
| 664ebe92dc | |||
| 8ec7b96b76 | |||
| 59a058073b | |||
| 38e8a7203b | |||
| 2939420f71 | |||
| ee1b10fd83 | |||
| 801463d31c | |||
| 51104ca5ab | |||
| a91dddfb45 | |||
| 83ef4a074a | |||
| 1f83cd1682 | |||
| f059e086d9 | |||
| 299e55ddb3 | |||
| f8784f3b5f | |||
| e4f1ca5192 | |||
| 5172319ca9 | |||
| a9f5c4e5c4 | |||
| b32c185b4e | |||
| bc2eff50dc | |||
| 5ab6177d17 | |||
| a0ad358fc2 | |||
| c94043db39 | |||
| 5700573777 | |||
| cc5726f97d | |||
| c908c4b131 | |||
| 37454a3766 | |||
| 12fea032bf | |||
| e085da0100 | |||
| ebe7791e27 | |||
| 526ce0484a | |||
| ae4dbaea3b | |||
| 03ea023b51 | |||
| 7fe36b1888 | |||
| f1e1bc7ae7 | |||
| 5233271e4f | |||
| 995a55cf18 | |||
| 3fa7e1440d | |||
| 5e74799f7e | |||
| 2124c5fcb3 | |||
| b3861a8539 | |||
| c657d723ad | |||
| 014d308db9 | |||
| 3aeb362d18 | |||
| 0b53b286dc | |||
| 84612f5bf4 | |||
| e55b5c7a82 | |||
| bda75b0ec0 | |||
| eab1e97471 | |||
| f10171edfb | |||
| ee34ee9526 | |||
| 11f7be8eba | |||
| 7fab23cca4 | |||
| 58dc6a5166 | |||
| 040e4edf9d | |||
| fd21b6593a | |||
| d1ac468a8f | |||
| a23c081194 | |||
| 8d71034ecf | |||
| 63d4edcbb7 | |||
| af5cc788aa | |||
| 3b331cbbf2 | |||
| 25007fbfb9 | |||
| 50860b5898 | |||
| f8a9ad5b05 | |||
| 29650623e6 | |||
| 498bbf2fda | |||
| 28c476889c | |||
| 8b985a8e40 | |||
| 4b9cbcfb7f | |||
| 066157025b | |||
| 12baacca69 | |||
| 9b306cd207 | |||
| 6798402182 | |||
| de0d19bb50 | |||
| b430f90f3b | |||
| 45f4da568c | |||
| 32f381bf61 | |||
| eecf6e8321 | |||
| 1ba033ec4c | |||
| ab1a0ae614 | |||
| e8653e7f96 | |||
| ee000e8ac3 | |||
| 96b0aa7355 | |||
| a1abe8ec3b | |||
| 526bdecf36 | |||
| e1de2aa0d3 | |||
| 7b8a95b48c | |||
| 4c6f368f97 | |||
| f77a39d2b2 | |||
| aadfe3d4d0 | |||
| 10b7b30807 | |||
| 7844f7ea9c | |||
| 79de0035fe | |||
| 64df399df4 | |||
| fdae57bf35 | |||
| 0f5dd1cf64 | |||
| 6f1bbe64eb | |||
| 46aaefeddb | |||
| 06bdc9182b | |||
| e9585264e5 | |||
| 8aba19870d | |||
| 54558460e7 | |||
| fc2dc9de21 | |||
| 05533a8fa8 | |||
| 0f35a5dccd | |||
| 0e13367518 | |||
| 8660d90f09 | |||
| f3e3b70934 | |||
| 371b33204c | |||
| 06e6e31fe1 | |||
| b50f61f025 | |||
| 5e69e1c7d6 | |||
| 754dc2e246 | |||
| 03763cb29f | |||
| ac041f8a77 | |||
| d91bf90adf | |||
| 57d576b3a5 | |||
| 4da102d869 | |||
| 96e2502b07 | |||
| cc35d66464 | |||
| 31c1ecba2e | |||
| 8d9e4ba8a8 | |||
| 36a5044d9e | |||
| 5eee66b6f7 | |||
| b0557aaa5e | |||
| 7acfaf23d2 | |||
| 0cfa984eef | |||
| d1069acf22 | |||
| 8455135367 | |||
| a49db53e51 | |||
| 8d7c818772 | |||
| 60973b86ab | |||
| 0022a83da2 | |||
| 15a0d2df76 | |||
| 1037a25dd8 | |||
| 8bad881549 | |||
| 335f7e8e95 | |||
| b363778ed1 | |||
| bda753f919 | |||
| 94da1d013d | |||
| 4b67dddc3d | |||
| b3ce9b994b | |||
| 3b4ebf7ad0 | |||
| b296edb629 | |||
| 8ec1c1cd75 | |||
| 7aeb0978ca | |||
| 28491dd4e5 | |||
| 6e795389a9 | |||
| 18825b326c | |||
| 271bc01497 | |||
| 92618ac659 | |||
| a0343bcd69 |
@@ -0,0 +1,38 @@
|
||||
From b8b4a6bcfe35ba9539a120cfd16573123ddd9241 Mon Sep 17 00:00:00 2001
|
||||
From: "Sv. Lockal" <lockalsash@gmail.com>
|
||||
Date: Mon, 15 Dec 2025 03:46:35 +0800
|
||||
Subject: [PATCH] Fix compilation with libdrm-2.4.130
|
||||
|
||||
Fix error: redefinition of 'struct drm_color_ctm_3x4'.
|
||||
|
||||
drm_color_ctm_3x4 structure is now defined in https://github.com/torvalds/linux/commit/e5719e7f19009d4fbedf685fc22eec9cd8de154f#diff-4c51fb416ec7cc69566cd7b795ee57eb070aa1006ad65d6962081f039ffb2718
|
||||
|
||||
As this structure is unused and not a part of amdsmi public interface,
|
||||
it is safe to remove it.
|
||||
---
|
||||
include/amd_smi/impl/amdgpu_drm.h | 9 ---------
|
||||
1 file changed, 9 deletions(-)
|
||||
|
||||
diff --git a/include/amd_smi/impl/amdgpu_drm.h b/include/amd_smi/impl/amdgpu_drm.h
|
||||
index b56a5ac4b20a..0e483d13b382 100644
|
||||
--- a/include/amd_smi/impl/amdgpu_drm.h
|
||||
+++ b/include/amd_smi/impl/amdgpu_drm.h
|
||||
@@ -1625,15 +1625,6 @@ struct drm_amdgpu_info_uq_metadata {
|
||||
#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */
|
||||
#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */
|
||||
|
||||
-/* FIXME wrong namespace! */
|
||||
-struct drm_color_ctm_3x4 {
|
||||
- /*
|
||||
- * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude
|
||||
- * (not two's complement!) format.
|
||||
- */
|
||||
- __u64 matrix[12];
|
||||
-};
|
||||
-
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
--
|
||||
2.51.1
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
From 21afd2c2d58b8c7895df47f6a16a0781a7f0024a Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <sakura286@outlook.com>
|
||||
Date: Sat, 6 Jun 2026 20:38:13 +0800
|
||||
Subject: [PATCH 1/2] Disable goamdsmi_shim when ESMI is off
|
||||
|
||||
The Go shim in goamdsmi_shim/smiwrapper/amdsmi_go_shim.c calls CPU-only
|
||||
APIs such as amdsmi_get_cpu_core_energy, amdsmi_get_threads_per_core
|
||||
and amdsmi_get_processor_handles_by_type. In include/amd_smi/amdsmi.h
|
||||
these are all guarded by `#ifdef ENABLE_ESMI_LIB`, so when ESMI is
|
||||
disabled (e.g. on riscv64 / non-x86 architectures) the declarations
|
||||
disappear and the shim fails to build with implicit-function-declaration
|
||||
errors.
|
||||
---
|
||||
CMakeLists.txt | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 1b7375f..013c1ad 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -363,8 +363,10 @@ install(
|
||||
PATTERN "build*" EXCLUDE
|
||||
PATTERN ".cache*" EXCLUDE)
|
||||
|
||||
-# Make for goamdsmi_shim library
|
||||
-add_subdirectory(goamdsmi_shim)
|
||||
+# The Go shim uses CPU APIs gated by ENABLE_ESMI_LIB; only build it when ESMI is on
|
||||
+if(ENABLE_ESMI_LIB)
|
||||
+ add_subdirectory(goamdsmi_shim)
|
||||
+endif()
|
||||
|
||||
#Debian package specific variables
|
||||
set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "python3-argcomplete, libdrm-dev, libdrm-amdgpu-dev")
|
||||
--
|
||||
2.53.0
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
From 41740f15ede6e04e46ff736bcc85ca8fd1aae641 Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <sakura286@outlook.com>
|
||||
Date: Sat, 6 Jun 2026 21:55:37 +0800
|
||||
Subject: [PATCH 2/2] Tolerate missing CPU/E-SMI symbols on non-x86_64
|
||||
|
||||
With ENABLE_ESMI_LIB=OFF (non-x86_64), libamd_smi.so omits the CPU API,
|
||||
but the ctypesgen wrapper binds every symbol at import time, so the
|
||||
missing CPU symbols make `import amdsmi` fail with AttributeError.
|
||||
|
||||
Wrap the loaded CDLL in a proxy so missing symbols resolve to a lazy
|
||||
stub that only raises when actually called. `import amdsmi` then works
|
||||
and GPU consumers (e.g. PyTorch with ROCM EP, which never calls the CPU
|
||||
API) are unaffected. The library object is wrapped instead of ctypes.CDLL
|
||||
itself so callers hooking ctypes.CDLL keep working.
|
||||
---
|
||||
py-interface/amdsmi_wrapper.py | 38 +++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 37 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py
|
||||
index 99ed017..0626b72 100644
|
||||
--- a/py-interface/amdsmi_wrapper.py
|
||||
+++ b/py-interface/amdsmi_wrapper.py
|
||||
@@ -176,6 +176,42 @@ from pathlib import Path
|
||||
# 3. Relative to amdsmi_wrapper.py
|
||||
# - parent directory
|
||||
# - current directory
|
||||
+class _AmdsmiMissingSymbol:
|
||||
+ # Placeholder for a symbol absent from libamd_smi.so (e.g. the CPU/E-SMI
|
||||
+ # API when the library is built with ENABLE_ESMI_LIB=OFF on non-x86_64).
|
||||
+ # It accepts the restype/argtypes the wrapper assigns at import time and
|
||||
+ # only raises if the symbol is ever actually called.
|
||||
+ def __init__(self, name):
|
||||
+ object.__setattr__(self, "_amdsmi_name", name)
|
||||
+
|
||||
+ def __setattr__(self, key, value):
|
||||
+ object.__setattr__(self, key, value)
|
||||
+
|
||||
+ def __call__(self, *args, **kwargs):
|
||||
+ name = object.__getattribute__(self, "_amdsmi_name")
|
||||
+ raise NotImplementedError(
|
||||
+ "amdsmi symbol " + repr(name) + " is unavailable: it is not "
|
||||
+ "exported by this build of libamd_smi.so")
|
||||
+
|
||||
+
|
||||
+class _AmdsmiTolerantLib:
|
||||
+ # Proxy around the loaded CDLL so that binding a symbol the library does
|
||||
+ # not export resolves to a stub instead of raising AttributeError at
|
||||
+ # import time. We wrap the already-constructed library object rather than
|
||||
+ # the ctypes.CDLL() call so that callers hooking ctypes.CDLL (e.g.
|
||||
+ # PyTorch's libamd_smi.so loader) keep working.
|
||||
+ def __init__(self, lib):
|
||||
+ object.__setattr__(self, "_amdsmi_lib", lib)
|
||||
+
|
||||
+ def __getattr__(self, name):
|
||||
+ try:
|
||||
+ return getattr(object.__getattribute__(self, "_amdsmi_lib"), name)
|
||||
+ except AttributeError:
|
||||
+ if name.startswith("__"):
|
||||
+ raise
|
||||
+ return _AmdsmiMissingSymbol(name)
|
||||
+
|
||||
+
|
||||
def find_smi_library():
|
||||
err = OSError("Could not load libamd_smi.so")
|
||||
possible_locations = []
|
||||
@@ -194,7 +230,7 @@ def find_smi_library():
|
||||
for location in possible_locations:
|
||||
try:
|
||||
lib = ctypes.CDLL(location)
|
||||
- return lib, location
|
||||
+ return _AmdsmiTolerantLib(lib), location
|
||||
except OSError as e:
|
||||
err = e
|
||||
continue
|
||||
--
|
||||
2.53.0
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
%global rocm_release 7.2
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
# esmi_ib_library is not suitable for packaging
|
||||
# https://github.com/amd/esmi_ib_library/issues/13
|
||||
# This tag was chosen by the amdsmi project because 4.0+ introduced variables
|
||||
# not found in the upstream kernel.
|
||||
%global esmi_ver 4.2
|
||||
%global pkg_library_version 26
|
||||
|
||||
Name: amdsmi
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: AMD System Management Interface
|
||||
License: MIT AND (GPL-2.0-only WITH Linux-syscall-note) AND NSCA
|
||||
# Main license is MIT
|
||||
#
|
||||
# This file is GPL-2.0:
|
||||
# include/amd_smi/impl/amd_hsmp.h
|
||||
# esmi_ib_library/include/asm/amd_hsmp.h
|
||||
# Both carry: SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
#
|
||||
# NSCA covers the bundled esmi_ib_library
|
||||
Url: https://github.com/ROCm/rocm-systems
|
||||
#!RemoteAsset: sha256:23c31cd787d86ee35c82746fcde705eacc46517815110376f28417909ef46406
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{name}.tar.gz
|
||||
#!RemoteAsset: sha256:de19d222d09e2171f47f8bbd6608e5648bd547c82543379bb8fb5ed2e379e141
|
||||
Source1: https://github.com/amd/esmi_ib_library/archive/refs/tags/esmi_pkg_ver-%{esmi_ver}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
# Support libdrm 2.4.130+
|
||||
# https://github.com/ROCm/amdsmi/pull/165
|
||||
Patch0: 0001-Fix-compilation-with-libdrm-2.4.130.patch
|
||||
# -DENABLE_ESMI_LIB=OFF is not enough.
|
||||
# Goamdshim references CPU/ESMI-only APIs; only build it when ESMI is on
|
||||
Patch1: 2001-Disable-goamdsmi_shim-when-ESMI-is-off.patch
|
||||
# Without ESMI (non-x86_64) libamd_smi.so omits the CPU API; let the ctypesgen
|
||||
# wrapper tolerate the missing symbols so `import amdsmi` still works
|
||||
Patch2: 2002-Tolerate-missing-CPU-E-SMI-symbols-on-non-x86_64.patch
|
||||
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DBUILD_TESTS=%{build_test}
|
||||
BuildOption(conf): -DCMAKE_SKIP_INSTALL_RPATH=TRUE
|
||||
%ifnarch x86_64
|
||||
BuildOption(conf): -DENABLE_ESMI_LIB=OFF
|
||||
%endif
|
||||
|
||||
BuildRequires: cmake
|
||||
%if %{with test}
|
||||
BuildRequires: cmake(GTest)
|
||||
%endif
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(libdrm)
|
||||
BuildRequires: pkgconfig(libdrm_amdgpu)
|
||||
BuildRequires: pkgconfig(python3)
|
||||
|
||||
Requires: python3dist(pyyaml)
|
||||
|
||||
%description
|
||||
The AMD System Management Interface Library, or AMD SMI library, is a C
|
||||
library for Linux that provides a user space interface for applications
|
||||
to monitor and control AMD devices.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -N -n %{name}
|
||||
%patch 0 -p1
|
||||
%patch 1 -p1
|
||||
%ifnarch x86_64
|
||||
%patch 2 -p1
|
||||
%endif
|
||||
|
||||
# ESMI - EPYC System Management Interface
|
||||
# esmi_ib_library uses x86-only cpuid.h; guard it for non-x86 builds
|
||||
%ifarch x86_64
|
||||
tar xf %{SOURCE1}
|
||||
mv esmi_ib_library-* esmi_ib_library
|
||||
mv esmi_ib_library/License.txt esmi_ib_library_License.txt
|
||||
# The esmi version check uses git tags, but we use tar's without git files.
|
||||
# Just inject in the tag that we've pulled into the version check:
|
||||
sed -i 's/NOT latest_esmi_tag/NOT "esmi_pkg_ver-%{esmi_ver}"/' CMakeLists.txt
|
||||
%endif
|
||||
|
||||
# /usr/libexec/amdsmi_cli/BDF.py:126: SyntaxWarning: invalid escape sequence '\.'
|
||||
sed -i -e 's@bdf_regex = "@bdf_regex = r"@' amdsmi_cli/BDF.py
|
||||
|
||||
# Fix script shebang
|
||||
sed -i -e 's@env python3@python3@' amdsmi_cli/*.py
|
||||
|
||||
%install -a
|
||||
mkdir -p %{buildroot}%{python3_sitearch}
|
||||
mv %{buildroot}%{_datadir}/amdsmi %{buildroot}%{python3_sitearch}
|
||||
mv %{buildroot}%{_datadir}/pyproject.toml %{buildroot}%{python3_sitearch}/amdsmi/
|
||||
|
||||
# W: unstripped-binary-or-object .../amdsmi/libamd_smi.so
|
||||
# Does an explicit open, so can not just rm it; strip it instead
|
||||
strip %{buildroot}%{python3_sitearch}/amdsmi/*.so
|
||||
# E: non-executable-script .../amdsmi_cli/amdsmi_cli_exceptions.py 644 /usr/bin/env python3
|
||||
chmod a+x %{buildroot}%{_libexecdir}/amdsmi_cli/amdsmi_*.py
|
||||
|
||||
rm -rf %{buildroot}%{_datadir}/example
|
||||
rm -rf %{buildroot}%{_datadir}/amd_smi/example
|
||||
rm -f %{buildroot}%{_datadir}/_version.py
|
||||
rm -f %{buildroot}%{_datadir}/amd_smi/_version.py
|
||||
rm -f %{buildroot}%{_datadir}/setup.py
|
||||
rm -f %{buildroot}%{_datadir}/amd_smi/setup.py
|
||||
rm -f %{buildroot}%{_docdir}/amd_smi-asan/LICENSE.txt
|
||||
rm -f %{buildroot}%{_docdir}/amd-smi-lib/LICENSE.txt
|
||||
rm -f %{buildroot}%{_docdir}/amd-smi-lib/README.md
|
||||
rm -rf %{buildroot}%{_docdir}/amd-smi-lib/copyright
|
||||
|
||||
if [ -e %{buildroot}%{_datadir}/amd_smi/tests ]; then
|
||||
mkdir -p %{buildroot}%{_datadir}/amdsmi
|
||||
mv %{buildroot}%{_datadir}/amd_smi/tests %{buildroot}%{_datadir}/amdsmi/
|
||||
fi
|
||||
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE
|
||||
%{_bindir}/amd-smi
|
||||
%{_libdir}/libamd_smi.so.%{pkg_library_version}{,.*}
|
||||
%{_libexecdir}/amdsmi_cli
|
||||
%{python3_sitearch}/amdsmi
|
||||
|
||||
%ifarch x86_64
|
||||
%license esmi_ib_library_License.txt
|
||||
%{_libdir}/libgoamdsmi_shim64.so.1{,.*}
|
||||
%endif
|
||||
|
||||
%files devel
|
||||
%{_includedir}/amd_smi/
|
||||
%{_libdir}/cmake/amd_smi/
|
||||
%{_libdir}/libamd_smi.so
|
||||
|
||||
%ifarch x86_64
|
||||
%{_includedir}/*.h
|
||||
%{_libdir}/libgoamdsmi_shim64.so
|
||||
%endif
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{_datadir}/amdsmi/
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
%autochangelog
|
||||
+18
-58
@@ -1,21 +1,21 @@
|
||||
# For testing
|
||||
# Depends on downloading and being in a git repo
|
||||
%bcond test 0
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# Header only package
|
||||
%global debug_package %{nil}
|
||||
|
||||
Summary: Functional Programming Library for C++
|
||||
Name: fplus
|
||||
Version: 0.2.28
|
||||
Release: %autorelease
|
||||
Summary: Helps you write concise and readable C++ code
|
||||
Url: https://github.com/Dobiasd/FunctionalPlus
|
||||
License: BSL-1.0
|
||||
Version: 0.2.25
|
||||
Release: 1%{?dist}
|
||||
|
||||
URL: https://github.com/Dobiasd/FunctionalPlus
|
||||
Source0: %{url}/archive/v%{version}.tar.gz#/FunctionalPlus-%{version}.tar.gz
|
||||
#!RemoteAsset: sha256:8864a3e9bebde6ebed71b49ac2a036cedf9ae0f02ce758bc28c21e6a2ae15803
|
||||
Source0: %{url}/archive/v%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
|
||||
%description
|
||||
FunctionalPlus is a small header-only library supporting you in
|
||||
@@ -26,51 +26,11 @@ run. It pursues these goals by providing pure and easy-to-use
|
||||
functions that free you from implementing commonly used flows of
|
||||
control over and over again.
|
||||
|
||||
%package devel
|
||||
|
||||
Summary: Functional Programming Library for C++
|
||||
Provides: %{name}-static = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
FunctionalPlus is a small header-only library supporting you in
|
||||
reducing code noise and in dealing with only one single level
|
||||
of abstraction at a time. By increasing brevity and maintainability
|
||||
of your code it can improve productivity (and fun!) in the long
|
||||
run. It pursues these goals by providing pure and easy-to-use
|
||||
functions that free you from implementing commonly used flows of
|
||||
control over and over again.
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -n FunctionalPlus-%{version}
|
||||
|
||||
# License check flags this as BSD 3-Clause
|
||||
# api_search not distributed, remove to make license simpler
|
||||
rm -rf api_search
|
||||
|
||||
%build
|
||||
%cmake
|
||||
%cmake_build
|
||||
|
||||
%if %{with test}
|
||||
%check
|
||||
%ctest
|
||||
%endif
|
||||
|
||||
%install
|
||||
%cmake_install
|
||||
|
||||
%files devel
|
||||
%dir %_includedir/%{name}
|
||||
%dir %_includedir/%{name}/internal
|
||||
%dir %_includedir/%{name}/internal/asserts
|
||||
%license LICENSE
|
||||
%files
|
||||
%doc README.md
|
||||
%_includedir/%{name}/*_defines
|
||||
%_includedir/%{name}/*.hpp
|
||||
%_includedir/%{name}/internal/*.hpp
|
||||
%_includedir/%{name}/internal/asserts/*.hpp
|
||||
%_libdir/cmake/FunctionalPlus/
|
||||
%license LICENSE
|
||||
%{_includedir}/fplus/
|
||||
%{_libdir}/cmake/FunctionalPlus/
|
||||
|
||||
%changelog
|
||||
* Mon Feb 2 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 0.2.25-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
%bcond test 0
|
||||
|
||||
# Header only package
|
||||
%global debug_package %{nil}
|
||||
|
||||
Summary: Header-only library for using Keras (TensorFlow) models in C++
|
||||
Name: frugally-deep
|
||||
License: MIT
|
||||
# Main license is MIT
|
||||
# BSD-2-Clause is only for cmake/HunterGate.cmake and that is not distributed
|
||||
Version: 0.15.30
|
||||
Release: 1%{?dist}
|
||||
|
||||
URL: https://github.com/Dobiasd/frugally-deep
|
||||
Source0: %{url}/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: eigen3
|
||||
BuildRequires: fplus-devel
|
||||
#BuildRequires: nlohmann_json-devel
|
||||
BuildRequires: nlohmann-json
|
||||
BuildRequires: gcc-c++
|
||||
|
||||
%description
|
||||
Would you like to build/train a model using Keras/Python? And would
|
||||
you like to run the prediction (forward pass) on your model in C++
|
||||
without linking your application against TensorFlow? Then
|
||||
frugally-deep is exactly for you.
|
||||
|
||||
frugally-deep
|
||||
|
||||
* is a small header-only library written in modern and pure C++.
|
||||
* is very easy to integrate and use.
|
||||
* depends only on FunctionalPlus, Eigen and json - also header-only
|
||||
libraries.
|
||||
* supports inference (model.predict) not only for sequential models
|
||||
but also for computational graphs with a more complex topology,
|
||||
created with the functional API.
|
||||
* re-implements a (small) subset of TensorFlow, i.e., the operations
|
||||
needed to support prediction.
|
||||
* results in a much smaller binary size than linking against TensorFlow.
|
||||
* works out-of-the-box also when compiled into a 32-bit executable.
|
||||
(Of course, 64 bit is fine too.)
|
||||
* avoids temporarily allocating (potentially large chunks of)
|
||||
additional RAM during convolutions (by not materializing the im2col
|
||||
input matrix).
|
||||
* utterly ignores even the most powerful GPU in your system and uses
|
||||
only one CPU core per prediction. ;-)
|
||||
* but is quite fast on one CPU core, and you can run multiple
|
||||
predictions in parallel, thus utilizing as many CPUs as you like
|
||||
to improve the overall prediction throughput of your
|
||||
application/pipeline.
|
||||
|
||||
%package devel
|
||||
|
||||
Summary: Header-only library for using Keras (TensorFlow) models in C++
|
||||
Provides: %{name}-static = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
Would you like to build/train a model using Keras/Python? And would
|
||||
you like to run the prediction (forward pass) on your model in C++
|
||||
without linking your application against TensorFlow? Then
|
||||
frugally-deep is exactly for you.
|
||||
|
||||
frugally-deep
|
||||
|
||||
* is a small header-only library written in modern and pure C++.
|
||||
* is very easy to integrate and use.
|
||||
* depends only on FunctionalPlus, Eigen and json - also header-only
|
||||
libraries.
|
||||
* supports inference (model.predict) not only for sequential models
|
||||
but also for computational graphs with a more complex topology,
|
||||
created with the functional API.
|
||||
* re-implements a (small) subset of TensorFlow, i.e., the operations
|
||||
needed to support prediction.
|
||||
* results in a much smaller binary size than linking against TensorFlow.
|
||||
* works out-of-the-box also when compiled into a 32-bit executable.
|
||||
(Of course, 64 bit is fine too.)
|
||||
* avoids temporarily allocating (potentially large chunks of)
|
||||
additional RAM during convolutions (by not materializing the im2col
|
||||
input matrix).
|
||||
* utterly ignores even the most powerful GPU in your system and uses
|
||||
only one CPU core per prediction. ;-)
|
||||
* but is quite fast on one CPU core, and you can run multiple
|
||||
predictions in parallel, thus utilizing as many CPUs as you like
|
||||
to improve the overall prediction throughput of your
|
||||
application/pipeline.
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -n %{name}-%{version}
|
||||
|
||||
# cmake changed
|
||||
sed -i -e 's@cmake_minimum_required(VERSION 3.2)@cmake_minimum_required(VERSION 3.5)@' CMakeLists.txt
|
||||
|
||||
%build
|
||||
%cmake
|
||||
%cmake_build
|
||||
|
||||
%if %{with test}
|
||||
%check
|
||||
%ctest
|
||||
%endif
|
||||
|
||||
%install
|
||||
%cmake_install
|
||||
|
||||
%files devel
|
||||
%dir %_includedir/fdeep
|
||||
%dir %_libdir/cmake/%{name}
|
||||
%license LICENSE
|
||||
%doc README.md
|
||||
%_includedir/fdeep/*
|
||||
%_libdir/cmake/%{name}/*
|
||||
|
||||
%changelog
|
||||
* Mon Feb 2 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 0.15.30-1
|
||||
- Import from upstream
|
||||
+30
-54
@@ -1,64 +1,40 @@
|
||||
# there is no debug package
|
||||
%global debug_package %{nil}
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
Name: half
|
||||
Version: 2.2.0
|
||||
Release: 1%{?dist}
|
||||
Summary: A C++ half-precision floating point type
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: ROCm half-precision floating point library
|
||||
License: MIT
|
||||
Url: https://github.com/ROCm/half
|
||||
#!RemoteAsset: sha256:1b5de9e50513560265a79022fd74322b77216f9bf938be688709a8e7d1d8d09d
|
||||
Source0: %{url}/archive/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
URL: http://sourceforge.net/projects/half
|
||||
Source0: %{url}/files/%{name}/%{version}/%{name}-%{version}.zip
|
||||
BuildArch: noarch
|
||||
|
||||
BuildRequires: unzip
|
||||
BuildRequires: cmake
|
||||
BuildRequires: rocm-cmake
|
||||
|
||||
%description
|
||||
This is a C++ header-only library to provide an IEEE-754 conformant
|
||||
half-precision floating point type along with corresponding arithmetic
|
||||
operators, type conversions and common mathematical functions. It aims
|
||||
for both efficiency and ease of use, trying to accurately mimic the
|
||||
behaviour of the builtin floating point types at the best performance
|
||||
possible. It automatically uses and provides C++11 features when
|
||||
possible, but stays completely C++98-compatible when neccessary.
|
||||
half is a C++ header-only library providing an IEEE-754 conformant
|
||||
half-precision floating point type along with arithmetic operators,
|
||||
type conversions, and common mathematical functions. It is part of
|
||||
the ROCm software stack.
|
||||
|
||||
%package devel
|
||||
Summary: A C++ half-precision floating point type
|
||||
Provides: %{name}-static = %{version}-%{release}
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/half/LICENSE.txt
|
||||
|
||||
%description devel
|
||||
This is a C++ header-only library to provide an IEEE-754 conformant
|
||||
half-precision floating point type along with corresponding arithmetic
|
||||
operators, type conversions and common mathematical functions. It aims
|
||||
for both efficiency and ease of use, trying to accurately mimic the
|
||||
behaviour of the builtin floating point types at the best performance
|
||||
possible. It automatically uses and provides C++11 features when
|
||||
possible, but stays completely C++98-compatible when neccessary.
|
||||
|
||||
%prep
|
||||
rm -rf %{name}-%{version}
|
||||
unzip -d %{name}-%{version} %{SOURCE0}
|
||||
cd %{name}-%{version}
|
||||
# change dos endings to unix
|
||||
sed -i "s|\r||g" include/half.hpp
|
||||
sed -i "s|\r||g" LICENSE.txt
|
||||
sed -i "s|\r||g" README.txt
|
||||
|
||||
%install
|
||||
cd %{name}-%{version}
|
||||
mkdir -p %{buildroot}%{_includedir}
|
||||
install -m 644 include/half.hpp %{buildroot}%{_includedir}
|
||||
|
||||
mkdir -p %{buildroot}%{_docdir}/%{name}/
|
||||
install -m 644 LICENSE.txt %{buildroot}%{_docdir}/%{name}/
|
||||
install -m 644 README.txt %{buildroot}%{_docdir}/%{name}/
|
||||
|
||||
%files devel
|
||||
%dir %{_docdir}/%{name}
|
||||
%doc %{_docdir}/%{name}/README.txt
|
||||
%license %{_docdir}/%{name}/LICENSE.txt
|
||||
%{_includedir}/half.hpp
|
||||
%files
|
||||
%license LICENSE.txt
|
||||
%doc README.txt
|
||||
%{_includedir}/half/
|
||||
|
||||
%changelog
|
||||
* Mon Feb 2 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 2.2.0-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_version 7.1.1
|
||||
|
||||
Name: hipblas-common
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: Common files shared by hipBLAS and hipBLASLt
|
||||
License: MIT
|
||||
Url: https://github.com/ROCm/hipBLAS-common
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildArch: noarch
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: rocm-cmake
|
||||
|
||||
%description
|
||||
%summary
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Provides: %{name}-static = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_prefix}/share/doc/hipblas-common/LICENSE.md
|
||||
|
||||
%files devel
|
||||
%license LICENSE.md
|
||||
%{_includedir}/%{name}
|
||||
%{_libdir}/cmake/%{name}
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
@@ -1,91 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_version 7.1.1
|
||||
|
||||
%bcond test 0
|
||||
|
||||
Name: hipblas
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: ROCm BLAS marshalling library
|
||||
License: MIT
|
||||
Url: https://github.com/ROCm/hipBLAS
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hipblas-common)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocblas)
|
||||
BuildRequires: cmake(rocsolver)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
Provides: hipblas = %{version}-%{release}
|
||||
|
||||
%description
|
||||
hipBLAS is a Basic Linear Algebra Subprograms (BLAS) marshalling
|
||||
library, with multiple supported backends. It sits between the
|
||||
application and a 'worker' BLAS library, marshalling inputs into
|
||||
the backend library and marshalling results back to the
|
||||
application. hipBLAS exports an interface that does not require
|
||||
the client to change, regardless of the chosen backend. Currently,
|
||||
hipBLAS supports rocBLAS and cuBLAS as backends.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Requires: cmake(hipblas-common)
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep -a
|
||||
# This is a tarball, no .git to query
|
||||
sed -i -e 's@find_package(Git REQUIRED)@#find_package(Git REQUIRED)@' library/CMakeLists.txt
|
||||
|
||||
%build -a
|
||||
rm -f %{buildroot}%{_prefix}/share/doc/hipblas/LICENSE.md
|
||||
|
||||
%files
|
||||
%license LICENSE.md
|
||||
%doc README.md
|
||||
%{_libdir}/libhipblas.so.3{,.*}
|
||||
|
||||
%files devel
|
||||
%{_includedir}/hipblas/
|
||||
%{_libdir}/libhipblas.so
|
||||
%{_libdir}/cmake/hipblas/
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{_bindir}/hipblas*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
@@ -0,0 +1,25 @@
|
||||
From 43c4a61c5d8836a16feb8e53c72f255790523ff3 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Mon, 3 Nov 2025 06:11:40 -0800
|
||||
Subject: [PATCH] hipblaslt find origami package
|
||||
|
||||
---
|
||||
CMakeLists.txt | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index dbccca92c84f..d02df50540c2 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -218,7 +218,7 @@ if(HIPBLASLT_ENABLE_MSGPACK)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
-add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../shared/origami" origami)
|
||||
+find_package(origami CONFIG REQUIRED)
|
||||
add_subdirectory(tensilelite)
|
||||
|
||||
if(HIPBLASLT_ENABLE_HOST)
|
||||
--
|
||||
2.52.0
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
From 72521fcca77c010b9c8b9ce91cde925164502d6f Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Thu, 25 Sep 2025 13:02:55 -0700
|
||||
Subject: [PATCH] hipblaslt tensilelite remove yappi dependency
|
||||
|
||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
||||
---
|
||||
tensilelite/Tensile/TensileCreateLibrary/Run.py | 15 ---------------
|
||||
tensilelite/requirements.txt | 2 +-
|
||||
2 files changed, 1 insertion(+), 16 deletions(-)
|
||||
|
||||
diff --git a/tensilelite/Tensile/TensileCreateLibrary/Run.py b/tensilelite/Tensile/TensileCreateLibrary/Run.py
|
||||
index 835ed9c01916..a02705f6554a 100644
|
||||
--- a/tensilelite/Tensile/TensileCreateLibrary/Run.py
|
||||
+++ b/tensilelite/Tensile/TensileCreateLibrary/Run.py
|
||||
@@ -231,12 +231,6 @@ def writeSolutionsAndKernels(
|
||||
generateSourcesAndExit=False,
|
||||
compress=True,
|
||||
):
|
||||
- if globalParameters["PythonProfile"]:
|
||||
- globalParameters["CpuThreads"] = 0
|
||||
- printWarning("Python profiling is enabled. CpuThreads set to 0.")
|
||||
- import yappi
|
||||
- yappi.start()
|
||||
-
|
||||
codeObjectFiles = []
|
||||
|
||||
outputPath = Path(outputPath)
|
||||
@@ -299,15 +293,6 @@ def writeSolutionsAndKernels(
|
||||
writeHelpers(outputPath, kernelHelperObjs, KERNEL_HELPER_FILENAME_CPP, KERNEL_HELPER_FILENAME_H)
|
||||
srcKernelFile = Path(outputPath) / "Kernels.cpp"
|
||||
|
||||
- if globalParameters["PythonProfile"]:
|
||||
- yappi.stop()
|
||||
- yappi.get_func_stats().save("yappi_results.profile", type="callgrind")
|
||||
- with open("yappi_results.txt", "w") as f:
|
||||
- yappi.get_func_stats().print_all(out=f)
|
||||
- if globalParameters["CpuThreads"] != 0:
|
||||
- with open("yappi_thread_stats.txt", "w") as f:
|
||||
- yappi.get_thread_stats().print_all(out=f)
|
||||
-
|
||||
if not generateSourcesAndExit:
|
||||
codeObjectFiles += buildAssemblyCodeObjectFiles(
|
||||
asmToolchain.linker,
|
||||
diff --git a/tensilelite/requirements.txt b/tensilelite/requirements.txt
|
||||
index 60c4c1144537..e87db8445411 100644
|
||||
--- a/tensilelite/requirements.txt
|
||||
+++ b/tensilelite/requirements.txt
|
||||
@@ -7,4 +7,4 @@ joblib>=1.1.1; python_version < '3.8'
|
||||
simplejson
|
||||
ujson
|
||||
orjson
|
||||
-yappi
|
||||
+
|
||||
--
|
||||
2.52.0
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
From 1ac117ac0591a0f1bb67c34f537354c21412b2d8 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Sat, 1 Nov 2025 09:43:58 -0700
|
||||
Subject: [PATCH] hipblaslt tensilelite use fedora paths
|
||||
|
||||
---
|
||||
tensilelite/Tensile/Common/GlobalParameters.py | 2 +-
|
||||
tensilelite/Tensile/Toolchain/Validators.py | 4 ++--
|
||||
2 files changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/tensilelite/Tensile/Common/GlobalParameters.py b/tensilelite/Tensile/Common/GlobalParameters.py
|
||||
index 567188da59bd..1f8037c183a6 100644
|
||||
--- a/tensilelite/Tensile/Common/GlobalParameters.py
|
||||
+++ b/tensilelite/Tensile/Common/GlobalParameters.py
|
||||
@@ -538,7 +538,7 @@ def assignGlobalParameters(config, isaInfoMap: Dict[IsaVersion, IsaInfo]):
|
||||
else:
|
||||
print2(" %24s: %8s (unspecified)" % (key, defaultValue))
|
||||
|
||||
- globalParameters["ROCmPath"] = "/opt/rocm"
|
||||
+ globalParameters["ROCmPath"] = "/usr"
|
||||
if "ROCM_PATH" in os.environ:
|
||||
globalParameters["ROCmPath"] = os.environ.get("ROCM_PATH")
|
||||
if "TENSILE_ROCM_PATH" in os.environ:
|
||||
diff --git a/tensilelite/Tensile/Toolchain/Validators.py b/tensilelite/Tensile/Toolchain/Validators.py
|
||||
index fd5dab5324c0..3ce024d31f52 100644
|
||||
--- a/tensilelite/Tensile/Toolchain/Validators.py
|
||||
+++ b/tensilelite/Tensile/Toolchain/Validators.py
|
||||
@@ -30,8 +30,8 @@ from typing import List, NamedTuple, Union
|
||||
|
||||
from Tensile.Common.Utilities import isRhel8
|
||||
|
||||
-DEFAULT_ROCM_BIN_PATH_POSIX = Path("/opt/rocm/bin")
|
||||
-DEFAULT_ROCM_LLVM_BIN_PATH_POSIX = Path("/opt/rocm/lib/llvm/bin")
|
||||
+DEFAULT_ROCM_BIN_PATH_POSIX = Path("/usr/bin")
|
||||
+DEFAULT_ROCM_LLVM_BIN_PATH_POSIX = Path("/usr/lib64/rocm/llvm/bin")
|
||||
DEFAULT_ROCM_BIN_PATH_WINDOWS = Path("C:/Program Files/AMD/ROCm")
|
||||
|
||||
|
||||
--
|
||||
2.52.0
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
From 9aa4664e02e27b50083be08e5b495cbef02d6f08 Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <sakura286@outlook.com>
|
||||
Date: Mon, 8 Jun 2026 09:00:08 +0800
|
||||
Subject: [PATCH] hipblaslt tensilelite use system nanobind
|
||||
|
||||
---
|
||||
tensilelite/rocisa/CMakeLists.txt | 8 +-------
|
||||
1 file changed, 1 insertion(+), 7 deletions(-)
|
||||
|
||||
diff --git a/tensilelite/rocisa/CMakeLists.txt b/tensilelite/rocisa/CMakeLists.txt
|
||||
index 3918f18..9c6fcd3 100644
|
||||
--- a/tensilelite/rocisa/CMakeLists.txt
|
||||
+++ b/tensilelite/rocisa/CMakeLists.txt
|
||||
@@ -17,13 +17,7 @@ target_include_directories(rocisa-cpp
|
||||
)
|
||||
|
||||
if(HIPBLASLT_BUNDLE_PYTHON_DEPS)
|
||||
- include(FetchContent)
|
||||
- FetchContent_Declare(
|
||||
- nanobind
|
||||
- GIT_REPOSITORY https://github.com/wjakob/nanobind.git
|
||||
- GIT_TAG 9b3afa9dbdc23641daf26fadef7743e7127ff92f # v2.6.1
|
||||
- )
|
||||
- FetchContent_MakeAvailable(nanobind)
|
||||
+ find_package(nanobind CONFIG REQUIRED)
|
||||
|
||||
set(ROCISAINST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/rocisa/src/instruction/instruction.cpp"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/rocisa/src/instruction/common.cpp"
|
||||
--
|
||||
2.53.0
|
||||
|
||||
@@ -0,0 +1,197 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%global toolchain clang
|
||||
|
||||
%bcond build_test 0
|
||||
%if %{with build_test}
|
||||
%global cmake_test ON
|
||||
%else
|
||||
%global cmake_test OFF
|
||||
%endif
|
||||
|
||||
%global tensile_version 4.33.0
|
||||
# The upstream hipBLASTLt project has a hard fork of the python-tensile package
|
||||
# The rocBLAS uses. The two versions are incompatible. It appears that the
|
||||
# fork happened around version 4.33.0. Unfortunately hipBLASLt can no longer be
|
||||
# build without using this fork.
|
||||
# https://github.com/ROCm/hipBLASLt/issues/535
|
||||
# The problem with the fork has been raised here.
|
||||
# https://github.com/ROCm/hipBLASLt/issues/908
|
||||
|
||||
%global tensile_verbose 1
|
||||
|
||||
Name: hipblaslt
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: ROCm general matrix operations beyond BLAS
|
||||
License: MIT AND BSD-3-Clause
|
||||
URL: https://github.com/ROCm/rocm-libraries
|
||||
#!RemoteAsset: sha256:05d73038b1b4f66f3df4eb595b7cb0c8935f7aa18d0e07dbe5cc740a4b691898
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{name}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=%{cmake_test}
|
||||
BuildOption(conf): -DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DHIPBLASLT_ENABLE_CLIENT=%{cmake_test}
|
||||
BuildOption(conf): -DHIPBLASLT_ENABLE_MARKER=OFF
|
||||
BuildOption(conf): -DHIPBLASLT_ENABLE_OPENMP=OFF
|
||||
BuildOption(conf): -DHIPBLASLT_ENABLE_ROCROLLER=OFF
|
||||
BuildOption(conf): -DHIPBLASLT_ENABLE_SAMPLES=OFF
|
||||
BuildOption(conf): -DTensile_LIBRARY_FORMAT=msgpack
|
||||
BuildOption(conf): -DTensile_VERBOSE=%{tensile_verbose}
|
||||
BuildOption(conf): -DVIRTUALENV_BIN_DIR=%{_bindir}
|
||||
BuildOption(conf): -Dnanobind_ROOT=%(python3 -m nanobind --cmake_dir)
|
||||
BuildOption(conf): -G Ninja
|
||||
|
||||
# yappi is used in tensilelite to generate profiling data, we are not using that in the build
|
||||
Patch0: 0001-hipblaslt-tensilelite-remove-yappi-dependency.patch
|
||||
# Patch from Fedora, change hard coded vendor paths
|
||||
Patch1: 0001-hipblaslt-tensilelite-use-system-paths.patch
|
||||
# https://github.com/ROCm/rocm-libraries/issues/2422
|
||||
Patch2: 0001-hipblaslt-find-origami-package.patch
|
||||
# use the distribution-provided nanobind instead of fetching/bundling it
|
||||
Patch3: 2001-hipblaslt-tensilelite-use-system-nanobind.patch
|
||||
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hipblas)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(msgpack)
|
||||
BuildRequires: cmake(origami)
|
||||
BuildRequires: cmake(rocblas)
|
||||
BuildRequires: cmake(rocm_smi)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
BuildRequires: pkgconfig(python3)
|
||||
BuildRequires: pkgconfig(zlib)
|
||||
# https://github.com/ROCm/hipBLASLt/issues/1734
|
||||
BuildRequires: python3dist(msgpack)
|
||||
# nanobind is used to build the rocisa native module (build-time only)
|
||||
BuildRequires: python3dist(nanobind)
|
||||
BuildRequires: python3dist(setuptools)
|
||||
BuildRequires: python3dist(pyyaml)
|
||||
BuildRequires: python3dist(joblib)
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: rocminfo
|
||||
|
||||
%if %{with build_test}
|
||||
BuildRequires: cmake(openblas)
|
||||
BuildRequires: cmake(GMock)
|
||||
BuildRequires: cmake(GTest)
|
||||
%endif
|
||||
|
||||
%description
|
||||
hipBLASLt is a library that provides general matrix-matrix
|
||||
operations. It has a flexible API that extends functionalities
|
||||
beyond a traditional BLAS library, such as adding flexibility
|
||||
to matrix data layouts, input types, compute types, and
|
||||
algorithmic implementations and heuristics.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%if %{with build_test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep -a
|
||||
# Use PATH to find where TensileGetPath and other tensile bins are
|
||||
sed -i -e 's@${Tensile_PREFIX}/bin/TensileGetPath@TensileGetPath@g' tensilelite/Tensile/cmake/TensileConfig.cmake
|
||||
|
||||
# defer to cmdline
|
||||
sed -i -e 's@set(CMAKE_INSTALL_LIBDIR@#set(CMAKE_INSTALL_LIBDIR@' CMakeLists.txt
|
||||
|
||||
# Do not use virtualenv_install
|
||||
sed -i -e 's@virtualenv_install@#virtualenv_install@' CMakeLists.txt
|
||||
|
||||
# Disable trying to download rocm-cmake
|
||||
sed -i -e 's@if(NOT ROCmCMakeBuildTools_FOUND)@if(FALSE)@' cmake/dependencies.cmake
|
||||
|
||||
# HIPBLASLT_ENABLE_OPENMP is OFF yet it is still being used
|
||||
# https://github.com/ROCm/rocm-libraries/issues/3201
|
||||
sed -i -e '/OpenMP::OpenMP_CXX/d' clients/CMakeLists.txt
|
||||
sed -i -e '/omp/d' clients/common/src/blis_interface.cpp
|
||||
sed -i -e '/#include <omp.h>/d' clients/common/include/testing_matmul.hpp
|
||||
sed -i -e '/#include <omp.h>/d' clients/common/include/hipblaslt_init.hpp
|
||||
sed -i -e '/#include <omp.h>/d' clients/common/src/cblas_interface.cpp
|
||||
|
||||
# We are building from a tarball, not a git repo
|
||||
sed -i -e 's@find_package(Git REQUIRED)@#find_package(Git REQUIRED)@' cmake/dependencies.cmake
|
||||
|
||||
# Forcefully replace all mentions of 'amdclang' with 'clang' in the Tensile Python files
|
||||
find tensilelite -type f -name "*.py" -exec sed -i 's/amdclang++/clang++/g; s/amdclang/clang/g' {} +
|
||||
|
||||
%build -p
|
||||
# Do a manual install instead of cmake's virtualenv
|
||||
cd tensilelite
|
||||
TL=$PWD
|
||||
|
||||
python3 setup.py install --root $TL
|
||||
cd ..
|
||||
|
||||
# Should not have to do this
|
||||
CLANG_PATH=`hipconfig --hipclangpath`
|
||||
ROCM_CLANG=${CLANG_PATH}/clang
|
||||
RESOURCE_DIR=`${ROCM_CLANG} -print-resource-dir`
|
||||
export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
|
||||
export TENSILE_ROCM_ASSEMBLER_PATH=${CLANG_PATH}/clang++
|
||||
export TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=${CLANG_PATH}/clang-offload-bundler
|
||||
|
||||
# Look for the just built tensilelite
|
||||
export PATH=${TL}/%{_bindir}:$PATH
|
||||
export PYTHONPATH=${TL}%{python3_sitelib}:$PYTHONPATH
|
||||
export Tensile_DIR=${TL}%{python3_sitelib}/Tensile
|
||||
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/hipblaslt/LICENSE.md
|
||||
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%{_libdir}/libhipblaslt.so.*
|
||||
%{_libdir}/hipblaslt/
|
||||
|
||||
%files devel
|
||||
%{_includedir}/hipblaslt/
|
||||
%{_includedir}/hipblaslt-export.h
|
||||
%{_includedir}/hipblaslt-version.h
|
||||
%{_libdir}/cmake/hipblaslt/
|
||||
%{_libdir}/libhipblaslt.so
|
||||
|
||||
%if %{with build_test}
|
||||
%files test
|
||||
%{_bindir}/hipblaslt*
|
||||
%{_bindir}/sequence.yaml
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
%autochangelog
|
||||
+52
-172
@@ -1,204 +1,84 @@
|
||||
%bcond gitcommit 0
|
||||
%if %{with gitcommit}
|
||||
%global commit0 2584e35062ad9c2edb68d93c464cf157bc57e3b0
|
||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||
%global date0 20250926
|
||||
%endif
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global upstreamname hipcub
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
|
||||
# Compiler is hipcc, which is clang based:
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-mtls-dialect=gnu2//')
|
||||
# there is no debug package
|
||||
%global debug_package %{nil}
|
||||
|
||||
# build test subpackage
|
||||
%bcond test 0
|
||||
|
||||
# Option to test suite for testing on real HW:
|
||||
%bcond check 0
|
||||
|
||||
%if %{with check} || %{with test}
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
%global toolchain clang
|
||||
|
||||
%global gpu_list %{rocm_gpu_list_default}
|
||||
%global _gpu_list gfx1100
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
Name: hipcub%{pkg_suffix}
|
||||
%if %{with gitcommit}
|
||||
Version: git%{date0}.%{shortcommit0}
|
||||
Release: 2%{?dist}
|
||||
%else
|
||||
Name: hipcub
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
%endif
|
||||
Summary: ROCm port of CUDA CUB library
|
||||
Release: %autorelease
|
||||
Summary: ROCm port of CUDA CUB (header-only)
|
||||
License: BSD-3-Clause AND MIT
|
||||
Url: https://github.com/ROCm/rocm-libraries
|
||||
#!RemoteAsset: sha256:6dadbb7689c7906493ec42f56792d9557f0293670a86059c9c188851f399647b
|
||||
Source: %{url}/releases/download/rocm-%{version}/hipcub.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
License: MIT and BSD-3-Clause
|
||||
URL: https://github.com/ROCm/rocm-libraries
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBUILD_TEST=%{build_test}
|
||||
|
||||
%if %{with gitcommit}
|
||||
Source0: %{url}/archive/%{commit0}/rocm-libraries-%{shortcommit0}.tar.gz
|
||||
%else
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{upstreamname}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
%endif
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocprim%{pkg_suffix}-static
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
|
||||
%if %{with check} || %{with test}
|
||||
BuildRequires: gtest-devel
|
||||
BuildRequires: rocminfo%{pkg_suffix}
|
||||
%endif
|
||||
|
||||
# Only headers, cmake infra but noarch confuses the libdir
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocprim)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%description
|
||||
hipCUB is a thin wrapper library on top of rocPRIM or CUB. It enables developers
|
||||
to port a project using the CUB library to the HIP layer to run on AMD hardware.
|
||||
In the ROCm environment, hipCUB uses the rocPRIM library as the backend.
|
||||
|
||||
%package devel
|
||||
Summary: The %{upstreamname} development package
|
||||
Provides: %{name}-static = %{version}-%{release}
|
||||
Requires: rocprim%{pkg_suffix}-devel
|
||||
|
||||
%description devel
|
||||
The %{upstreamname} development package.
|
||||
hipCUB is a thin header-only wrapper library on top of rocPRIM which enables
|
||||
developers to render portable HIP code. Existing CUDA CUB source code can
|
||||
be recompiled in HIP using hipCUB.
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
Summary: Self-tests for %{name}
|
||||
Requires: %{name}-devel%{?_isa} = %{version}-%{release}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
Precompiled self-tests for %{name}
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n rocm-libraries-%{commit0}
|
||||
cd projects/hipcub
|
||||
%else
|
||||
%autosetup -p1 -n %{upstreamname}
|
||||
%endif
|
||||
%prep -a
|
||||
# Fix cmake install lib directory
|
||||
sed -i -e 's/ROCM_INSTALL_LIBDIR lib/ROCM_INSTALL_LIBDIR %{_lib}/' \
|
||||
cmake/ROCMExportTargetsHeaderOnly.cmake
|
||||
|
||||
#
|
||||
# The ROCMExportTargetsHeaderOnly.cmake file
|
||||
# generates a files that reference the install location of other files
|
||||
# Make this change so they match
|
||||
sed -i -e 's/ROCM_INSTALL_LIBDIR lib/ROCM_INSTALL_LIBDIR %{pkg_libdir}/' cmake/ROCMExportTargetsHeaderOnly.cmake
|
||||
%install -a
|
||||
rm -f %{buildroot}/%{_datadir}/doc/hipcub/LICENSE.txt
|
||||
|
||||
%build
|
||||
%if %{with gitcommit}
|
||||
cd projects/hipcub
|
||||
%endif
|
||||
|
||||
|
||||
%if %{with check}
|
||||
# Building all the gpu's does not make sense
|
||||
# Build only the first one, this only works well with rpmbuild.
|
||||
gpu=`rocm_agent_enumerator | head -n 1`
|
||||
%endif
|
||||
|
||||
%cmake \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \
|
||||
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \
|
||||
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DBUILD_TEST=%{build_test} \
|
||||
%if %{with check}
|
||||
-DAMDGPU_TARGETS=${gpu} \
|
||||
%else
|
||||
-DAMDGPU_TARGETS=${gpu_list} \
|
||||
%endif
|
||||
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
|
||||
-DROCM_SYMLINK_LIBS=OFF
|
||||
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
%if %{with gitcommit}
|
||||
cd projects/hipcub
|
||||
%endif
|
||||
|
||||
%cmake_install
|
||||
|
||||
# Extra license
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/hipcub/LICENSE.txt
|
||||
|
||||
%if %{with check}
|
||||
%check
|
||||
%ctest
|
||||
%endif
|
||||
|
||||
%files devel
|
||||
%if %{with gitcommit}
|
||||
%doc projects/hipcub/README.md
|
||||
%license projects/hipcub/LICENSE.txt
|
||||
%else
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE.txt
|
||||
%endif
|
||||
%{pkg_prefix}/include/hipcub
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/hipcub
|
||||
%{_includedir}/hipcub/
|
||||
%{_libdir}/cmake/hipcub/
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{pkg_prefix}/bin/test_*
|
||||
%{pkg_prefix}/bin/hipcub/
|
||||
%{_bindir}/test_*
|
||||
%{_bindir}/hipcub/
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Feb 9 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
+65
-184
@@ -1,224 +1,105 @@
|
||||
%bcond gitcommit 0
|
||||
%if %{with gitcommit}
|
||||
%global commit0 2584e35062ad9c2edb68d93c464cf157bc57e3b0
|
||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||
%global date0 20250926
|
||||
%endif
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%bcond test 1
|
||||
|
||||
%global upstreamname hipfft
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
%global hipfft_name hipfft%{pkg_suffix}
|
||||
# rocm stack builds with clang
|
||||
%global toolchain clang
|
||||
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-mtls-dialect=gnu2//' -e 's/-flto=thin//' )
|
||||
|
||||
%global _lto_cflags %{nil}
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%global __brp_check_rpaths %{nil}
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
# Option to test suite for testing on real HW:
|
||||
# May have to set gpu under test with
|
||||
# export HIP_VISIBLE_DEVICES=<num> - 0, 1 etc.
|
||||
%bcond check 0
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
|
||||
Name: %{hipfft_name}
|
||||
%if %{with gitcommit}
|
||||
Version: git%{date0}.%{shortcommit0}
|
||||
Release: 2%{?dist}
|
||||
%else
|
||||
Name: hipfft
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
%endif
|
||||
Release: %autorelease
|
||||
Summary: ROCm FFT marshalling library
|
||||
Url: https://github.com/ROCm/rocm-libraries
|
||||
VCS: git:https://github.com/ROCm/hipFFT.git
|
||||
License: MIT
|
||||
URL: https://github.com/ROCm/rocm-libraries
|
||||
|
||||
%if %{with gitcommit}
|
||||
Source0: %{url}/archive/%{commit0}/rocm-libraries-%{shortcommit0}.tar.gz
|
||||
%else
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{upstreamname}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
%endif
|
||||
|
||||
# https://github.com/ROCm/rocm-libraries/issues/2400
|
||||
#!RemoteAsset: sha256:f6f0352b5f9ffe53c88cea5fa40572eef0c0c1e2e50dce6f85d2c68e47afc63e
|
||||
Source: %{url}/releases/download/rocm-%{version}/hipfft.tar.gz
|
||||
Patch1: 0001-hipfft-hipfftw-soversion.patch
|
||||
BuildSystem: cmake
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=ON
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS_OPENMP=OFF
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
BuildRequires: rocprim%{pkg_suffix}-static
|
||||
BuildRequires: rocfft%{pkg_suffix}-devel
|
||||
|
||||
%if %{with test}
|
||||
BuildRequires: boost-devel
|
||||
BuildRequires: fftw-devel
|
||||
BuildRequires: hiprand%{pkg_suffix}-devel
|
||||
BuildRequires: rocrand%{pkg_suffix}-devel
|
||||
|
||||
BuildRequires: gtest-devel
|
||||
%endif
|
||||
|
||||
Provides: hipfft%{pkg_suffix} = %{version}-%{release}
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(GTest)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hiprand)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocfft)
|
||||
BuildRequires: cmake(rocrand)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(fftw3)
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%description
|
||||
hipFFT is an FFT marshalling library. Currently, hipFFT supports
|
||||
the rocFFT backends
|
||||
hipFFT is a FFT marshalling library. Currently, hipFFT supports either
|
||||
rocFFT or cuFFT as backends. hipFFT exports an interface that does not
|
||||
require the client to change, regardless of the chosen backend.
|
||||
|
||||
hipFFT exports an interface that does not require the client to
|
||||
change, regardless of the chosen backend. It sits between the
|
||||
application and the backend FFT library, marshalling inputs into
|
||||
the backend and results back to the application.
|
||||
|
||||
%post -p /sbin/ldconfig
|
||||
%postun -p /sbin/ldconfig
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
%package devel
|
||||
Summary: The hipFFT development package
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Provides: hipfft%{pkg_suffix}-devel = %{version}-%{release}
|
||||
Requires: cmake(rocfft)
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
%description devel
|
||||
The hipFFT development package.
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n rocm-libraries-%{commit0}
|
||||
cd projects/hipfft
|
||||
%else
|
||||
%autosetup -p1 -n %{upstreamname}
|
||||
%endif
|
||||
|
||||
%prep -a
|
||||
# CMake Error at clients/tests/CMakeLists.txt:87 (find_package):
|
||||
# No "FindHIP.cmake" found in CMAKE_MODULE_PATH.
|
||||
# Remove MODULE
|
||||
sed -i -e 's@find_package( HIP MODULE REQUIRED )@find_package( HIP REQUIRED )@' clients/tests/CMakeLists.txt
|
||||
sed -i -e 's@find_package( HIP MODULE REQUIRED )@find_package( HIP REQUIRED )@' \
|
||||
clients/tests/CMakeLists.txt
|
||||
|
||||
%build
|
||||
%if %{with gitcommit}
|
||||
cd projects/hipfft
|
||||
%endif
|
||||
%install -a
|
||||
rm -f %{buildroot}/%{_datadir}/doc/hipfft/LICENSE.md
|
||||
|
||||
%cmake \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \
|
||||
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \
|
||||
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \
|
||||
-DCMAKE_BUILD_TYPE=%{build_type} \
|
||||
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
|
||||
-DCMAKE_SKIP_RPATH=ON \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DAMDGPU_TARGETS=%{rocm_gpu_list_default} \
|
||||
-DBUILD_CLIENTS_TESTS=%{build_test} \
|
||||
-DBUILD_CLIENTS_TESTS_OPENMP=OFF \
|
||||
-DROCM_SYMLINK_LIBS=OFF \
|
||||
-DHIP_PLATFORM=amd
|
||||
|
||||
%cmake_build
|
||||
|
||||
|
||||
%install
|
||||
%if %{with gitcommit}
|
||||
cd projects/hipfft
|
||||
%endif
|
||||
%cmake_install
|
||||
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/hipfft/LICENSE.md
|
||||
|
||||
%check
|
||||
%if %{with test}
|
||||
%if %{with check}
|
||||
export LD_LIBRARY_PATH=%{_vpath_builddir}/library:$LD_LIBRARY_PATH
|
||||
%{_vpath_builddir}/clients/staging/hipfft-test
|
||||
%endif
|
||||
%check -p
|
||||
export LD_LIBRARY_PATH=$PWD/%{__cmake_builddir}/library:$LD_LIBRARY_PATH
|
||||
%endif
|
||||
|
||||
%files
|
||||
%if %{with gitcommit}
|
||||
%license projects/hipfft/LICENSE.md
|
||||
%doc projects/hipfft/README.md
|
||||
%else
|
||||
%license LICENSE.md
|
||||
%doc README.md
|
||||
%endif
|
||||
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipfft.so.0{,.*}
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipfftw.so.0{,.*}
|
||||
%license LICENSE.md
|
||||
%{_libdir}/libhipfft.so.0{,.*}
|
||||
%{_libdir}/libhipfftw.so.0{,.*}
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/include/hipfft/
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipfft.so
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipfftw.so
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/hipfft/
|
||||
%{_includedir}/hipfft/
|
||||
%{_libdir}/cmake/hipfft/
|
||||
%{_libdir}/libhipfft.so
|
||||
%{_libdir}/libhipfftw.so
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{pkg_prefix}/bin/hipfft-test
|
||||
%endif
|
||||
%{_bindir}/hipfft-test
|
||||
|
||||
%changelog
|
||||
* Mon Feb 9 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
+55
-175
@@ -1,107 +1,52 @@
|
||||
%bcond gitcommit 0
|
||||
%if %{with gitcommit}
|
||||
%global commit0 2584e35062ad9c2edb68d93c464cf157bc57e3b0
|
||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||
%global date0 20250926
|
||||
%endif
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# HIP error 100: no ROCm-capable device is detected
|
||||
# hipRAND needs a GPU to run tests, but we could still
|
||||
# keep the test cases for packagers who have a GPU, so make it optional.
|
||||
%bcond run_test 0
|
||||
|
||||
%global upstreamname hiprand
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
%global hiprand_name hiprand%{pkg_suffix}
|
||||
# rocm stack builds with clang
|
||||
%global toolchain clang
|
||||
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-mtls-dialect=gnu2//')
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
# Option to test suite for testing on real HW:
|
||||
%bcond check 0
|
||||
# For docs
|
||||
%bcond doc 0
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
|
||||
Name: %{hiprand_name}
|
||||
%if %{with gitcommit}
|
||||
Version: git%{date0}.%{shortcommit0}
|
||||
Release: 3%{?dist}
|
||||
%else
|
||||
Name: hiprand
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
%endif
|
||||
Release: %autorelease
|
||||
Summary: HIP random number generator
|
||||
License: MIT AND BSD-3-Clause
|
||||
URL: https://github.com/ROCm/rocm-libraries
|
||||
%if %{with gitcommit}
|
||||
Source0: %{url}/archive/%{commit0}/rocm-libraries-%{shortcommit0}.tar.gz
|
||||
%else
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{upstreamname}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
%endif
|
||||
Url: https://github.com/ROCm/rocm-libraries
|
||||
#!RemoteAsset: sha256:41e4053a3c16ea4bdc6e94fff428d8ffe7279e9cfa7ec142afc50169aae2c1f8
|
||||
Source: %{url}/releases/download/rocm-%{version}/hiprand.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
BuildOption(conf): -DAMDGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBUILD_TEST=ON
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
BuildRequires: rocrand%{pkg_suffix}-devel
|
||||
|
||||
%if %{with test}
|
||||
BuildRequires: gtest-devel
|
||||
%endif
|
||||
|
||||
%if %{with doc}
|
||||
BuildRequires: doxygen
|
||||
%endif
|
||||
|
||||
Provides: hiprand%{pkg_suffix} = %{version}-%{release}
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(GTest)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocrand)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%description
|
||||
hipRAND is a RAND marshalling library, with multiple supported backends. It
|
||||
@@ -110,113 +55,48 @@ into the backend and results back to the application. hipRAND exports an
|
||||
interface that does not require the client to change, regardless of the chosen
|
||||
backend. Currently, hipRAND supports either rocRAND or cuRAND.
|
||||
|
||||
%post -p /sbin/ldconfig
|
||||
%postun -p /sbin/ldconfig
|
||||
|
||||
%package devel
|
||||
%package devel
|
||||
Summary: The hipRAND development package
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Requires: rocrand%{pkg_suffix}-devel
|
||||
Provides: hiprand%{pkg_suffix}-devel = %{version}-%{release}
|
||||
Requires: cmake(rocrand)
|
||||
|
||||
%description devel
|
||||
%description devel
|
||||
The hipRAND development package.
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n rocm-libraries-%{commit0}
|
||||
cd projects/hiprand
|
||||
%else
|
||||
%autosetup -p1 -n %{upstreamname}
|
||||
%endif
|
||||
|
||||
#Remove RPATH:
|
||||
%prep -a
|
||||
# Remove RPATH
|
||||
sed -i '/INSTALL_RPATH/d' CMakeLists.txt
|
||||
|
||||
# On Tumbleweed Q2,2025
|
||||
# /usr/include/gtest/internal/gtest-port.h:279:2: error: C++ versions less than C++14 are not supported.
|
||||
# 279 | #error C++ versions less than C++14 are not supported.
|
||||
# https://github.com/ROCm/hipRAND/issues/222
|
||||
# Convert the c++11's to c++14
|
||||
sed -i -e 's@set(CMAKE_CXX_STANDARD 11)@set(CMAKE_CXX_STANDARD 14)@' {,test/package/}CMakeLists.txt
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/hiprand/LICENSE.md
|
||||
rm -f %{buildroot}%{_bindir}/hipRAND/CTestTestfile.cmake
|
||||
|
||||
%build
|
||||
%if %{with gitcommit}
|
||||
cd projects/hiprand
|
||||
%endif
|
||||
|
||||
%cmake \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \
|
||||
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \
|
||||
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \
|
||||
-DCMAKE_BUILD_TYPE=%{build_type} \
|
||||
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
|
||||
-DCMAKE_SKIP_RPATH=ON \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DAMDGPU_TARGETS=%{rocm_gpu_list_default} \
|
||||
-DBUILD_TEST=%{build_test} \
|
||||
-DROCM_SYMLINK_LIBS=OFF
|
||||
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
%if %{with gitcommit}
|
||||
cd projects/hiprand
|
||||
%endif
|
||||
%cmake_install
|
||||
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/hiprand/LICENSE.md
|
||||
rm -f %{buildroot}%{pkg_prefix}/bin/hipRAND/CTestTestfile.cmake
|
||||
%check -p
|
||||
export LD_LIBRARY_PATH=$PWD/%{__cmake_builddir}/library:$LD_LIBRARY_PATH
|
||||
|
||||
%if %{without run_test}
|
||||
%check
|
||||
%if %{with test}
|
||||
%if %{with check}
|
||||
|
||||
%ctest
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%files
|
||||
%if %{with gitcommit}
|
||||
%doc projects/hiprand/README.md
|
||||
%license projects/hiprand/LICENSE.md
|
||||
%else
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%endif
|
||||
%if %{with debug}
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhiprand-d.so.1{,.*}
|
||||
%else
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhiprand.so.1{,.*}
|
||||
%endif
|
||||
%{_libdir}/libhiprand.so.1{,.*}
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/include/hiprand/
|
||||
%if %{with debug}
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhiprand-d.so
|
||||
%else
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhiprand.so
|
||||
%endif
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/hiprand/
|
||||
%{_includedir}/hiprand/
|
||||
%{_libdir}/cmake/hiprand/
|
||||
%{_libdir}/libhiprand.so
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{pkg_prefix}/bin/test*
|
||||
%endif
|
||||
%{_bindir}/test*
|
||||
|
||||
%changelog
|
||||
* Mon Feb 9 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
+82
-164
@@ -1,206 +1,124 @@
|
||||
%bcond gitcommit 0
|
||||
%if %{with gitcommit}
|
||||
%global commit0 2584e35062ad9c2edb68d93c464cf157bc57e3b0
|
||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||
%global date0 20250926
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# TODO: hipSOLVER need lapack to build test/benchmark/sample
|
||||
# But openblas on openRuyi does not provide this
|
||||
%bcond build_test 0
|
||||
%if %{with build_test}
|
||||
%global cmake_test ON
|
||||
%else
|
||||
%global cmake_test OFF
|
||||
%endif
|
||||
|
||||
%global upstreamname hipSOLVER
|
||||
# hipSOLVER needs a GPU to run tests, but we could still
|
||||
# keep the test cases for packagers who have a GPU.
|
||||
%bcond run_test 0
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
%global hipsolver_name hipsolver%{pkg_suffix}
|
||||
# rocm stack builds with clang
|
||||
%global toolchain clang
|
||||
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-mtls-dialect=gnu2//')
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
# gfortran and clang rpm macros do not mix
|
||||
# Fortran is only used in testing
|
||||
# clang and gfortran fedora toolchain args do not mix
|
||||
%global build_fflags %{nil}
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
|
||||
Name: %{hipsolver_name}
|
||||
%if %{with gitcommit}
|
||||
Version: git%{date0}.%{shortcommit0}
|
||||
Release: 3%{?dist}
|
||||
%else
|
||||
Name: hipsolver
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
%endif
|
||||
Summary: ROCm SOLVER marshalling library
|
||||
Release: %autorelease
|
||||
Summary: ROCm SOLVER marshalling library (LAPACK)
|
||||
License: MIT
|
||||
Url: https://github.com/ROCm/hipSOLVER
|
||||
#!RemoteAsset: sha256:bd664e3cd43bfcc7e94d5a387c27262c4b218d6d2e71e086992b174349dd1c10
|
||||
Source: %{url}/archive/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
%if %{with gitcommit}
|
||||
Url: https://github.com/ROCm/rocm-libraries
|
||||
Source0: %{url}/archive/%{commit0}/rocm-libraries-%{shortcommit0}.tar.gz
|
||||
%else
|
||||
Url: https://github.com/ROCm/%{upstreamname}
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz#/%{upstreamname}-%{rocm_version}.tar.gz
|
||||
%endif
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=%{cmake_test}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_BENCHMARKS=%{cmake_test}
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocblas)
|
||||
BuildRequires: cmake(rocsolver)
|
||||
BuildRequires: cmake(rocsparse)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: rocblas%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
BuildRequires: rocsolver%{pkg_suffix}-devel
|
||||
BuildRequires: rocsparse%{pkg_suffix}-devel
|
||||
|
||||
%if %{with test}
|
||||
BuildRequires: gtest-devel
|
||||
BuildRequires: hipsparse%{pkg_suffix}-devel
|
||||
BuildRequires: blas-static
|
||||
BuildRequires: lapack-static
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
%if %{with build_test}
|
||||
BuildRequires: cmake(GTest)
|
||||
BuildRequires: cmake(hipsparse)
|
||||
BuildRequires: pkgconfig(openblas)
|
||||
%endif
|
||||
|
||||
Provides: hipsolver%{pkg_suffix} = %{version}-%{release}
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
|
||||
%description
|
||||
hipSOLVER is a LAPACK marshalling library, with multiple supported
|
||||
backends. It sits between the application and a 'worker'
|
||||
LAPACK library, marshalling inputs into the backend library and
|
||||
marshalling results back to the application. hipSOLVER exports an
|
||||
interface that does not require the client to change, regardless
|
||||
of the chosen backend. Currently, hipSOLVER supports rocSOLVER
|
||||
and cuSOLVER as backends.
|
||||
hipSOLVER is a LAPACK marshalling library, with multiple supported backends.
|
||||
It sits between the application and a "worker" SOLVER library, marshalling
|
||||
inputs into the backend library and results back to the application. hipSOLVER
|
||||
exports an interface that does not require the client to change, regardless of
|
||||
the chosen backend.
|
||||
|
||||
%post -p /sbin/ldconfig
|
||||
%postun -p /sbin/ldconfig
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
%package devel
|
||||
Summary: The hipSOLVER development package
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Provides: hipsolver%{pkg_suffix}-devel = %{version}-%{release}
|
||||
Requires: cmake(rocblas)
|
||||
Requires: cmake(rocsolver)
|
||||
Requires: cmake(rocsparse)
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
%description devel
|
||||
The hipSOLVER development package.
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%if %{with build_test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n rocm-libraries-%{commit0}
|
||||
cd projects/hipsolver
|
||||
%patch -P1 -p1
|
||||
%else
|
||||
%autosetup -p1 -n %{upstreamname}-rocm-%{version}
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/hipsolver/LICENSE.md
|
||||
|
||||
%check -p
|
||||
export LD_LIBRARY_PATH=$PWD/%{__cmake_builddir}/library:$LD_LIBRARY_PATH
|
||||
|
||||
%if %{without test}
|
||||
%check
|
||||
%endif
|
||||
|
||||
%build
|
||||
%if %{with gitcommit}
|
||||
cd projects/hipsolver
|
||||
%endif
|
||||
|
||||
%cmake \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \
|
||||
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \
|
||||
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \
|
||||
-DCMAKE_BUILD_TYPE=%{build_type} \
|
||||
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
|
||||
-DCMAKE_SKIP_RPATH=ON \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DROCM_SYMLINK_LIBS=OFF \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DAMDGPU_TARGETS=%{rocm_gpu_list_default} \
|
||||
-DBUILD_CLIENTS_TESTS=%{build_test}
|
||||
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
%if %{with gitcommit}
|
||||
cd projects/hipsolver
|
||||
%endif
|
||||
|
||||
%cmake_install
|
||||
|
||||
# Extra license
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/hipsolver/LICENSE.md
|
||||
|
||||
%files
|
||||
%if %{with gitcommit}
|
||||
%doc projects/hipsolver/README.md
|
||||
%license projects/hipsolver/LICENSE.md
|
||||
%else
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%endif
|
||||
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipsolver.so.1{,.*}
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipsolver_fortran.so.1{,.*}
|
||||
%{_libdir}/libhipsolver.so.1{,.*}
|
||||
%{_libdir}/libhipsolver_fortran.so.1{,.*}
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/include/hipsolver/
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipsolver.so
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipsolver_fortran.so
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/hipsolver/
|
||||
%{_includedir}/hipsolver/
|
||||
%{_libdir}/libhipsolver.so
|
||||
%{_libdir}/libhipsolver_fortran.so
|
||||
%{_libdir}/cmake/hipsolver/
|
||||
|
||||
%if %{with test}
|
||||
%if %{with build_test}
|
||||
%files test
|
||||
%{pkg_prefix}/share/hipsolver/
|
||||
%{pkg_prefix}/bin/hipsolver*
|
||||
%{_datadir}/hipsolver/
|
||||
%{_bindir}/hipsolver*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Feb 2 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
From 4105bd51e654df8b40c53eb4b4abe7a3415783eb Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Fri, 31 Oct 2025 07:28:05 -0700
|
||||
Subject: [PATCH] hipsparse change test download dir
|
||||
|
||||
---
|
||||
clients/tests/CMakeLists.txt | 22 ++++++++++++----------
|
||||
1 file changed, 12 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/clients/tests/CMakeLists.txt b/clients/tests/CMakeLists.txt
|
||||
index 15a7c21f4c30..69beb9da6208 100644
|
||||
--- a/clients/tests/CMakeLists.txt
|
||||
+++ b/clients/tests/CMakeLists.txt
|
||||
@@ -27,11 +27,13 @@ find_package(GTest REQUIRED)
|
||||
#
|
||||
# Client matrices.
|
||||
#
|
||||
+option(CMAKE_MATRICES_DIR "Directory containing test matrices" ${PROJECT_BINARY_DIR}/matrices)
|
||||
+
|
||||
if(NOT EXISTS "${CMAKE_MATRICES_DIR}")
|
||||
#
|
||||
# Download.
|
||||
#
|
||||
- set(CMAKE_MATRICES_DIR ${PROJECT_BINARY_DIR}/matrices CACHE STRING "Matrices directory.")
|
||||
+
|
||||
file(MAKE_DIRECTORY ${CMAKE_MATRICES_DIR})
|
||||
|
||||
if(NOT TARGET hipsparse)
|
||||
@@ -46,15 +48,15 @@ else()
|
||||
#
|
||||
# Copy.
|
||||
#
|
||||
- if(NOT CMAKE_MATRICES_DIR STREQUAL "${PROJECT_BINARY_DIR}/matrices")
|
||||
- message("Copy matrix files from ${CMAKE_MATRICES_DIR} to ${PROJECT_BINARY_DIR}/matrices")
|
||||
-
|
||||
- execute_process(COMMAND cp -r ${CMAKE_MATRICES_DIR} ${PROJECT_BINARY_DIR}/matrices RESULT_VARIABLE STATUS WORKING_DIRECTORY ${CMAKE_MATRICES_DIR})
|
||||
-
|
||||
- if(STATUS AND NOT STATUS EQUAL 0)
|
||||
- message(FATAL_ERROR "Failed to copy matrix .bin files, aborting.")
|
||||
- endif()
|
||||
- endif()
|
||||
+ # if(NOT CMAKE_MATRICES_DIR STREQUAL "${PROJECT_BINARY_DIR}/matrices")
|
||||
+ # message("Copy matrix files from ${CMAKE_MATRICES_DIR} to ${PROJECT_BINARY_DIR}/matrices")
|
||||
+ #
|
||||
+ # execute_process(COMMAND cp -r ${CMAKE_MATRICES_DIR} ${PROJECT_BINARY_DIR}/matrices RESULT_VARIABLE STATUS WORKING_DIRECTORY ${CMAKE_MATRICES_DIR})
|
||||
+ #
|
||||
+ # if(STATUS AND NOT STATUS EQUAL 0)
|
||||
+ # message(FATAL_ERROR "Failed to copy matrix .bin files, aborting.")
|
||||
+ # endif()
|
||||
+ # endif()
|
||||
|
||||
endif()
|
||||
|
||||
--
|
||||
2.51.0
|
||||
|
||||
+76
-208
@@ -1,129 +1,66 @@
|
||||
%bcond gitcommit 0
|
||||
%if %{with gitcommit}
|
||||
%global commit0 2584e35062ad9c2edb68d93c464cf157bc57e3b0
|
||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||
%global date0 20250926
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# hipSPARSE need to download about 19 testing matrix
|
||||
# It is verbose to add them to SOURCE and %%prep section
|
||||
%bcond build_test 0
|
||||
%if %{with build_test}
|
||||
%global cmake_test ON
|
||||
%else
|
||||
%global cmake_test OFF
|
||||
%endif
|
||||
|
||||
%global upstreamname hipsparse
|
||||
# hipSPARSE needs a GPU to run tests, but we could still
|
||||
# keep the test cases for packagers who have a GPU.
|
||||
%bcond run_test 0
|
||||
|
||||
# This ROCm package is built with clang by default
|
||||
%global toolchain clang
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
%global hipsparse_name hipsparse%{pkg_suffix}
|
||||
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-mtls-dialect=gnu2//')
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
# export an llvm compilation database
|
||||
# Useful for input for other llvm tools
|
||||
%bcond export 0
|
||||
%if %{with export}
|
||||
%global build_compile_db ON
|
||||
%else
|
||||
%global build_compile_db OFF
|
||||
%endif
|
||||
|
||||
# downloads tests, use mock --enable-network
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%global __brp_check_rpaths %{nil}
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
%bcond check 0
|
||||
|
||||
# gfortran and clang rpm macros do not mix
|
||||
%global build_fflags %{nil}
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
|
||||
Name: %{hipsparse_name}
|
||||
%if %{with gitcommit}
|
||||
Version: git%{date0}.%{shortcommit0}
|
||||
Release: 1%{?dist}
|
||||
%else
|
||||
Name: hipsparse
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
%endif
|
||||
Release: %autorelease
|
||||
Summary: ROCm SPARSE marshalling library
|
||||
License: MIT
|
||||
URL: https://github.com/ROCm/rocm-libraries
|
||||
Url: https://github.com/ROCm/hipSPARSE
|
||||
#!RemoteAsset: sha256:b001834d8e65c3878d1a69d08803d5b6ce4fe623e78099fe51cb146d0ffa10e7
|
||||
Source: %{url}/archive/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
%if %{with gitcommit}
|
||||
Source0: %{url}/archive/%{commit0}/rocm-libraries-%{shortcommit0}.tar.gz
|
||||
%else
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{upstreamname}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
%endif
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_SAMPLES=OFF
|
||||
BuildOption(conf): -DBUILD_CLIENTS_BENCHMARKS=ON
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=%{cmake_test}
|
||||
|
||||
Patch1: 0001-hipsparse-change-test-download-dir.patch
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
%if %{with build_test}
|
||||
BuildRequires: cmake(GTest)
|
||||
%endif
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocprim)
|
||||
BuildRequires: cmake(rocsparse)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
BuildRequires: rocprim%{pkg_suffix}-static
|
||||
BuildRequires: rocsparse%{pkg_suffix}-devel
|
||||
|
||||
%if %{with test}
|
||||
BuildRequires: gtest-devel
|
||||
BuildRequires: rocblas%{pkg_suffix}-devel
|
||||
%endif
|
||||
|
||||
%if %{with check}
|
||||
%if %{with export}
|
||||
BuildRequires: cppcheck
|
||||
BuildRequires: cppcheck-htmlreport
|
||||
BuildRequires: rocm-clang-analyzer%{pkg_suffix}
|
||||
BuildRequires: rocm-clang-tools-extra%{pkg_suffix}
|
||||
%endif
|
||||
%endif
|
||||
|
||||
Provides: hipsparse%{pkg_suffix} = %{version}-%{release}
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%description
|
||||
hipSPARSE is a SPARSE marshalling library with multiple
|
||||
@@ -135,126 +72,57 @@ require the client to change, regardless of the chosen
|
||||
backend. Currently, hipSPARSE supports rocSPARSE and
|
||||
cuSPARSE backends.
|
||||
|
||||
%post -p /sbin/ldconfig
|
||||
%postun -p /sbin/ldconfig
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
%package benchmark
|
||||
Summary: Benchmark for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Provides: hipsparse%{pkg_suffix}-devel = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%description benchmark
|
||||
%{summary}
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%if %{with build_test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n rocm-libraries-%{commit0}
|
||||
cd projects/hiprand
|
||||
%patch -P1 -p1
|
||||
%else
|
||||
%autosetup -p1 -n %{upstreamname}
|
||||
%endif
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/hipsparse/LICENSE.md
|
||||
|
||||
# A better default for the matrices dir
|
||||
sed -i -e 's@hipsparse_exepath() + "../matrices/"@"%{pkg_prefix}/share/hipsparse/matrices/"@' clients/include/utility.hpp
|
||||
%check -p
|
||||
export LD_LIBRARY_PATH=$PWD/%{__cmake_builddir}/library:$LD_LIBRARY_PATH
|
||||
|
||||
%build
|
||||
%if %{with gitcommit}
|
||||
cd projects/hipsparse
|
||||
%endif
|
||||
|
||||
%cmake \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=%{build_compile_db} \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \
|
||||
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \
|
||||
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \
|
||||
-DCMAKE_BUILD_TYPE=%build_type \
|
||||
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
|
||||
-DCMAKE_SKIP_RPATH=ON \
|
||||
-DROCM_SYMLINK_LIBS=OFF \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DGPU_TARGETS=%{rocm_gpu_list_default} \
|
||||
-DBUILD_CLIENTS_BENCHMARKS=%{build_test} \
|
||||
-DBUILD_CLIENTS_SAMPLES=OFF \
|
||||
-DBUILD_CLIENTS_TESTS=%{build_test} \
|
||||
-DBUILD_CLIENTS_TESTS_OPENMP=OFF \
|
||||
-DCMAKE_MATRICES_DIR=%{_builddir}/hipsparse-test-matrices/ \
|
||||
-DBUILD_FORTRAN_CLIENTS=OFF
|
||||
|
||||
%cmake_build
|
||||
|
||||
%if %{with check}
|
||||
%if %{without run_test}
|
||||
%check
|
||||
%if %{with export}
|
||||
json=`find . -name 'compile_commands.json'`
|
||||
json=`realpath $json`
|
||||
json_dir=`dirname $json`
|
||||
if [ -f ${json} ]; then
|
||||
jobs=`nproc`
|
||||
export PATH=%{rocmllvm_bindir}:$PATH
|
||||
output=/tmp/%{name}-tidy/
|
||||
mkdir -p ${output}
|
||||
# Use echo to consume tidy's error code
|
||||
%{rocmllvm_bindir}/run-clang-tidy -p ${json_dir} &> ${output}/tidy.log || echo "ran clang-tidy"
|
||||
|
||||
output=/tmp/%{name}-cppcheck/
|
||||
mkdir -p ${output}
|
||||
cppcheck --project=${json} -j ${jobs} --std=c++17 --safety --output-file=${output}/cppcheck.txt
|
||||
cppcheck --project=${json} -j ${jobs} --std=c++17 --safety --xml --output-file=${output}/cppcheck.xml
|
||||
cppcheck-htmlreport --file=${output}/cppcheck.xml --report-dir=${output}
|
||||
fi
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%install
|
||||
%if %{with gitcommit}
|
||||
cd projects/hipsparse
|
||||
%endif
|
||||
|
||||
%cmake_install
|
||||
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/hipsparse/LICENSE.md
|
||||
|
||||
%if %{with test}
|
||||
mkdir -p %{buildroot}/%{pkg_prefix}/share/hipsparse/matrices
|
||||
install -pm 644 %{_builddir}/%{name}-test-matrices/* %{buildroot}/%{pkg_prefix}/share/hipsparse/matrices
|
||||
%endif
|
||||
|
||||
%files
|
||||
%if %{with gitcommit}
|
||||
%doc projects/hipsparse/README.md
|
||||
%license projects/hipsparse/LICENSE.md
|
||||
%else
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%endif
|
||||
%{_libdir}/libhipsparse.so.4{,.*}
|
||||
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipsparse.so.4{,.*}
|
||||
%files benchmark
|
||||
%{_bindir}/hipsparse-bench
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/include/hipsparse/
|
||||
%{pkg_prefix}/%{pkg_libdir}/libhipsparse.so
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/hipsparse/
|
||||
%{_includedir}/hipsparse/
|
||||
%{_libdir}/cmake/hipsparse/
|
||||
%{_libdir}/libhipsparse.so
|
||||
|
||||
%if %{with test}
|
||||
%if %{with build_test}
|
||||
%files test
|
||||
%{pkg_prefix}/bin/hipsparse*
|
||||
%{pkg_prefix}/share/hipsparse/
|
||||
%{_bindir}/hipsparse*
|
||||
%{_datadir}/hipsparse/
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Feb 2 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
From 43c4a61c5d8836a16feb8e53c72f255790523ff3 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Mon, 3 Nov 2025 06:11:40 -0800
|
||||
Subject: [PATCH] hipblaslt find origami package
|
||||
|
||||
---
|
||||
CMakeLists.txt | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index dbccca92c84f..d02df50540c2 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -218,7 +218,7 @@ if(HIPBLASLT_ENABLE_MSGPACK)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
-add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../shared/origami" origami)
|
||||
+find_package(origami CONFIG REQUIRED)
|
||||
add_subdirectory(tensilelite)
|
||||
|
||||
if(HIPBLASLT_ENABLE_HOST)
|
||||
--
|
||||
2.52.0
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
From c20b846b6d594464eccf865045ef0ef10384f407 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Thu, 25 Sep 2025 13:02:55 -0700
|
||||
Subject: [PATCH] hipblaslt tensilelite remove yappi dependency
|
||||
|
||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
||||
---
|
||||
tensilelite/Tensile/TensileCreateLibrary/Run.py | 15 ---------------
|
||||
tensilelite/requirements.txt | 2 +-
|
||||
2 files changed, 1 insertion(+), 16 deletions(-)
|
||||
|
||||
diff --git a/tensilelite/Tensile/TensileCreateLibrary/Run.py b/tensilelite/Tensile/TensileCreateLibrary/Run.py
|
||||
index f0bbe8acd127..fc076e6935e8 100644
|
||||
--- a/tensilelite/Tensile/TensileCreateLibrary/Run.py
|
||||
+++ b/tensilelite/Tensile/TensileCreateLibrary/Run.py
|
||||
@@ -231,12 +231,6 @@ def writeSolutionsAndKernels(
|
||||
generateSourcesAndExit=False,
|
||||
compress=True,
|
||||
):
|
||||
- if globalParameters["PythonProfile"]:
|
||||
- globalParameters["CpuThreads"] = 0
|
||||
- printWarning("Python profiling is enabled. CpuThreads set to 0.")
|
||||
- import yappi
|
||||
- yappi.start()
|
||||
-
|
||||
codeObjectFiles = []
|
||||
|
||||
outputPath = Path(outputPath)
|
||||
@@ -299,15 +293,6 @@ def writeSolutionsAndKernels(
|
||||
writeHelpers(outputPath, kernelHelperObjs, KERNEL_HELPER_FILENAME_CPP, KERNEL_HELPER_FILENAME_H)
|
||||
srcKernelFile = Path(outputPath) / "Kernels.cpp"
|
||||
|
||||
- if globalParameters["PythonProfile"]:
|
||||
- yappi.stop()
|
||||
- yappi.get_func_stats().save("yappi_results.profile", type="callgrind")
|
||||
- with open("yappi_results.txt", "w") as f:
|
||||
- yappi.get_func_stats().print_all(out=f)
|
||||
- if globalParameters["CpuThreads"] != 0:
|
||||
- with open("yappi_thread_stats.txt", "w") as f:
|
||||
- yappi.get_thread_stats().print_all(out=f)
|
||||
-
|
||||
if not generateSourcesAndExit:
|
||||
codeObjectFiles += buildAssemblyCodeObjectFiles(
|
||||
asmToolchain.linker,
|
||||
diff --git a/tensilelite/requirements.txt b/tensilelite/requirements.txt
|
||||
index 60c4c1144537..e87db8445411 100644
|
||||
--- a/tensilelite/requirements.txt
|
||||
+++ b/tensilelite/requirements.txt
|
||||
@@ -7,4 +7,4 @@ joblib>=1.1.1; python_version < '3.8'
|
||||
simplejson
|
||||
ujson
|
||||
orjson
|
||||
-yappi
|
||||
+
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
--- ./hipBLASLt/tensilelite/Tensile/Toolchain/Validators.py 2025-11-14 05:43:51
|
||||
+++ ./hipBLASLt/tensilelite/Tensile/Toolchain/Validators.py.mod 2026-03-04 16:10:09
|
||||
@@ -114,11 +114,11 @@
|
||||
|
||||
|
||||
class ToolchainDefaults(NamedTuple):
|
||||
- CXX_COMPILER = osSelect(linux="amdclang++", windows="clang++.exe")
|
||||
- C_COMPILER = osSelect(linux="amdclang", windows="clang.exe")
|
||||
+ CXX_COMPILER = osSelect(linux="clang++", windows="clang++.exe")
|
||||
+ C_COMPILER = osSelect(linux="clang", windows="clang.exe")
|
||||
OFFLOAD_BUNDLER = osSelect(linux="clang-offload-bundler", windows="clang-offload-bundler.exe")
|
||||
DEVICE_ENUMERATOR = osSelect(linux="rocm_agent_enumerator" if isRhel8() else "amdgpu-arch", windows="hipinfo")
|
||||
- ASSEMBLER = osSelect(linux="amdclang++", windows="clang++.exe")
|
||||
+ ASSEMBLER = osSelect(linux="clang++", windows="clang++.exe")
|
||||
HIP_CONFIG = osSelect(linux="hipconfig", windows="hipconfig.exe")
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
From 1ac117ac0591a0f1bb67c34f537354c21412b2d8 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Sat, 1 Nov 2025 09:43:58 -0700
|
||||
Subject: [PATCH] hipblaslt tensilelite use fedora paths
|
||||
|
||||
---
|
||||
tensilelite/Tensile/Common/GlobalParameters.py | 2 +-
|
||||
tensilelite/Tensile/Toolchain/Validators.py | 4 ++--
|
||||
2 files changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/tensilelite/Tensile/Common/GlobalParameters.py b/tensilelite/Tensile/Common/GlobalParameters.py
|
||||
index 567188da59bd..1f8037c183a6 100644
|
||||
--- a/tensilelite/Tensile/Common/GlobalParameters.py
|
||||
+++ b/tensilelite/Tensile/Common/GlobalParameters.py
|
||||
@@ -538,7 +538,7 @@ def assignGlobalParameters(config, isaInfoMap: Dict[IsaVersion, IsaInfo]):
|
||||
else:
|
||||
print2(" %24s: %8s (unspecified)" % (key, defaultValue))
|
||||
|
||||
- globalParameters["ROCmPath"] = "/opt/rocm"
|
||||
+ globalParameters["ROCmPath"] = "/usr"
|
||||
if "ROCM_PATH" in os.environ:
|
||||
globalParameters["ROCmPath"] = os.environ.get("ROCM_PATH")
|
||||
if "TENSILE_ROCM_PATH" in os.environ:
|
||||
diff --git a/tensilelite/Tensile/Toolchain/Validators.py b/tensilelite/Tensile/Toolchain/Validators.py
|
||||
index fd5dab5324c0..3ce024d31f52 100644
|
||||
--- a/tensilelite/Tensile/Toolchain/Validators.py
|
||||
+++ b/tensilelite/Tensile/Toolchain/Validators.py
|
||||
@@ -30,8 +30,8 @@ from typing import List, NamedTuple, Union
|
||||
|
||||
from Tensile.Common.Utilities import isRhel8
|
||||
|
||||
-DEFAULT_ROCM_BIN_PATH_POSIX = Path("/opt/rocm/bin")
|
||||
-DEFAULT_ROCM_LLVM_BIN_PATH_POSIX = Path("/opt/rocm/lib/llvm/bin")
|
||||
+DEFAULT_ROCM_BIN_PATH_POSIX = Path("/usr/bin")
|
||||
+DEFAULT_ROCM_LLVM_BIN_PATH_POSIX = Path("/usr/lib64/rocm/llvm/bin")
|
||||
DEFAULT_ROCM_BIN_PATH_WINDOWS = Path("C:/Program Files/AMD/ROCm")
|
||||
|
||||
|
||||
--
|
||||
2.52.0
|
||||
|
||||
@@ -0,0 +1,205 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%global toolchain clang
|
||||
|
||||
%global tensile_version 4.33.0
|
||||
%global tensile_verbose 1
|
||||
|
||||
%bcond build_test 0
|
||||
%if %{with build_test}
|
||||
%global cmake_test ON
|
||||
%else
|
||||
%global cmake_test OFF
|
||||
%endif
|
||||
|
||||
Name: hipsparselt
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: A SPARSE marshaling library
|
||||
License: MIT
|
||||
URL: https://github.com/ROCm/rocm-libraries
|
||||
#!RemoteAsset: sha256:7672d1ac94d2694999b6937d19f5e92e67fb844eea394b4e8525c531fd1acd8c
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{name}.tar.gz
|
||||
#!RemoteAsset: sha256:05d73038b1b4f66f3df4eb595b7cb0c8935f7aa18d0e07dbe5cc740a4b691898
|
||||
Source1: %{url}/releases/download/rocm-%{version}/hipblaslt.tar.gz
|
||||
# Patches for hipBLASLt's tensilelite (applied during prep inside hipBLASLt/)
|
||||
Source2: 0001-hipblaslt-tensilelite-remove-yappi-dependency.patch
|
||||
Source3: 0001-hipblaslt-tensilelite-use-system-paths.patch
|
||||
Source4: 0001-hipblaslt-find-origami-package.patch
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -DBLAS_INCLUDE_DIR=%{_includedir}/flexiblas
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=%{cmake_test}
|
||||
BuildOption(conf): -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF
|
||||
BuildOption(conf): -DBUILD_VERBOSE=ON
|
||||
BuildOption(conf): -DCMAKE_Fortran_COMPILER=gcc-fortran
|
||||
BuildOption(conf): -DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DTensile_COMPILER=clang++
|
||||
BuildOption(conf): -DTensile_LIBRARY_FORMAT=msgpack
|
||||
BuildOption(conf): -DTensile_VERBOSE=%{tensile_verbose}
|
||||
BuildOption(conf): -DVIRTUALENV_BIN_DIR=%{_bindir}
|
||||
BuildOption(conf): -Dnanobind_ROOT=%(python3 -m nanobind --cmake_dir)
|
||||
BuildOption(conf): -G Ninja
|
||||
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hipsparse)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(origami)
|
||||
BuildRequires: cmake(rocm_smi)
|
||||
BuildRequires: cmake(rocsparse)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
BuildRequires: pkgconfig(msgpack)
|
||||
BuildRequires: pkgconfig(python3)
|
||||
BuildRequires: pkgconfig(zlib)
|
||||
BuildRequires: python3dist(joblib)
|
||||
BuildRequires: python3dist(msgpack)
|
||||
# nanobind is used to build the rocisa native module (build-time only)
|
||||
BuildRequires: python3dist(nanobind)
|
||||
BuildRequires: python3dist(pyyaml)
|
||||
BuildRequires: python3dist(setuptools)
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocminfo
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: roctracer-devel
|
||||
|
||||
%if %{with build_test}
|
||||
BuildRequires: chrpath
|
||||
BuildRequires: pkgconfig(openblas)
|
||||
BuildRequires: pkgconfig(gtest)
|
||||
BuildRequires: pkgconfig(gmock)
|
||||
%endif
|
||||
|
||||
%description
|
||||
hipSPARSELt is a SPARSE marshaling library that provides general sparse
|
||||
matrix-matrix multiplication using structured sparsity. It offers a flexible
|
||||
API and supports multiple backends.
|
||||
|
||||
%package devel
|
||||
Summary: The hipSPARSELt development package
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
The hipSPARSELt development package.
|
||||
|
||||
%if %{with build_test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -n %{name}
|
||||
|
||||
tar xf %{SOURCE1}
|
||||
cd hipblaslt
|
||||
|
||||
patch -p1 < %{SOURCE2}
|
||||
patch -p1 < %{SOURCE3}
|
||||
patch -p1 < %{SOURCE4}
|
||||
|
||||
# Use PATH to find where TensileGetPath and other tensile bins are
|
||||
sed -i -e 's@${Tensile_PREFIX}/bin/TensileGetPath@TensileGetPath@g' \
|
||||
tensilelite/Tensile/cmake/TensileConfig.cmake
|
||||
|
||||
# Make sure hip/hip_runtime.h is found
|
||||
sed -i -e 's@-x hip @-I%{_includedir} -x hip @' device-library/matrix-transform/CMakeLists.txt
|
||||
sed -i -e 's@"-D__HIP_HCC_COMPAT_MODE__=1"@"-D__HIP_HCC_COMPAT_MODE__=1","-I%{_includedir}"@' \
|
||||
tensilelite/Tensile/Toolchain/Component.py
|
||||
|
||||
# Use the distribution-provided nanobind instead of fetching/bundling it
|
||||
sed -i -e 's@FetchContent_MakeAvailable(nanobind)@find_package(nanobind CONFIG REQUIRED)@' \
|
||||
tensilelite/rocisa/CMakeLists.txt
|
||||
|
||||
# disable openmp in hipBLASLt
|
||||
sed -i -e 's@option(HIPBLASLT_ENABLE_OPENMP "Use OpenMP to improve performance." ON)@option(HIPBLASLT_ENABLE_OPENMP "Use OpenMP to improve performance." OFF)@' CMakeLists.txt
|
||||
|
||||
cd ..
|
||||
|
||||
# Point hipBLASLt path at the bundled in-source copy (default looks in ../hipblaslt)
|
||||
sed -i -e 's@${CMAKE_CURRENT_SOURCE_DIR}/../hipblaslt@${CMAKE_CURRENT_SOURCE_DIR}/hipblaslt@' CMakeLists.txt
|
||||
|
||||
# Prevent the virtualenv install from cmake
|
||||
sed -i -e 's@virtualenv_install@#virtualenv_install@' CMakeLists.txt
|
||||
|
||||
# Unforce the setting of libdir
|
||||
sed -i -e 's@set(CMAKE_INSTALL_LIBDIR@#set(CMAKE_INSTALL_LIBDIR@' CMakeLists.txt
|
||||
|
||||
# Change looking for cblas to flexiblas
|
||||
sed -i -e 's@find_package( cblas REQUIRED CONFIG )@#find_package( cblas REQUIRED CONFIG )@' clients/CMakeLists.txt
|
||||
sed -i -e 's@set( BLAS_LIBRARY "blas" )@set( BLAS_LIBRARY "flexiblas" )@' clients/CMakeLists.txt
|
||||
sed -i -e 's@lapack cblas@flexiblas@' clients/gtest/CMakeLists.txt
|
||||
|
||||
# We are building from a tarball, not a git repo
|
||||
sed -i -e 's@find_package(Git REQUIRED)@#find_package(Git REQUIRED)@' hipblaslt/cmake/dependencies.cmake
|
||||
sed -i -e 's@find_package(Git REQUIRED)@#find_package(Git REQUIRED)@' cmake/Dependencies.cmake
|
||||
|
||||
# Replace all mentions of 'amdclang' with 'clang' in Tensile Python files
|
||||
find hipblaslt/tensilelite -type f -name "*.py" -exec sed -i 's/amdclang++/clang++/g; s/amdclang/clang/g' {} +
|
||||
|
||||
%build -p
|
||||
# Do a manual install of tensilelite instead of cmake's virtualenv, then point
|
||||
# Tensile at it for build-time kernel generation (same approach as hipblaslt)
|
||||
cd hipblaslt/tensilelite
|
||||
TL=$PWD
|
||||
python3 setup.py install --root $TL
|
||||
cd ../..
|
||||
|
||||
export PATH=%{_prefix}/bin:%{rocmllvm_bindir}:$PATH
|
||||
CLANG_PATH=`hipconfig --hipclangpath`
|
||||
ROCM_CLANG=${CLANG_PATH}/clang
|
||||
RESOURCE_DIR=`${ROCM_CLANG} -print-resource-dir`
|
||||
export DEVICE_LIB_PATH=${RESOURCE_DIR}/amdgcn/bitcode
|
||||
export TENSILE_ROCM_ASSEMBLER_PATH=${CLANG_PATH}/clang++
|
||||
export TENSILE_ROCM_OFFLOAD_BUNDLER_PATH=${CLANG_PATH}/clang-offload-bundler
|
||||
export PATH=${TL}/%{_bindir}:$PATH
|
||||
export PYTHONPATH=${TL}%{python3_sitelib}:$PYTHONPATH
|
||||
export Tensile_DIR=${TL}%{python3_sitelib}/Tensile
|
||||
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/hipsparselt/LICENSE.md
|
||||
|
||||
# Strip and fix permissions on hsaco kernel files
|
||||
%{rocmllvm_bindir}/llvm-strip %{buildroot}%{_libdir}/hipsparselt/library/Kernels*.hsaco
|
||||
chmod a+x %{buildroot}%{_libdir}/hipsparselt/library/Kernels*.hsaco
|
||||
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%{_libdir}/libhipsparselt.so.*
|
||||
%{_libdir}/hipsparselt/
|
||||
|
||||
%files devel
|
||||
%{_includedir}/hipsparselt/
|
||||
%{_libdir}/cmake/hipsparselt/
|
||||
%{_libdir}/libhipsparselt.so
|
||||
|
||||
%if %{with build_test}
|
||||
%files test
|
||||
%{_bindir}/hipsparselt*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
%autochangelog
|
||||
@@ -1,28 +0,0 @@
|
||||
From 19a83736d99acc42308f8003094c27b920d1c285 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Sun, 7 Jul 2024 05:07:39 -0600
|
||||
Subject: [PATCH] Prepare magma cmake for fedora
|
||||
|
||||
Need to version *.so'
|
||||
|
||||
Signed-off-by: Tom Rix <trix@redhat.com>
|
||||
---
|
||||
CMakeLists.txt | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 0c9b41c7d7fb..06e401e1199f 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -760,6 +760,8 @@ foreach( TEST ${sparse_testing_all} )
|
||||
endforeach()
|
||||
add_custom_target( sparse-testing DEPENDS ${sparse-testing} )
|
||||
|
||||
+SET_TARGET_PROPERTIES(magma PROPERTIES VERSION magma_VERSION SOVERSION magma_VERSION)
|
||||
+SET_TARGET_PROPERTIES(magma_sparse PROPERTIES VERSION magma_VERSION SOVERSION magma_VERSION)
|
||||
|
||||
# ----------------------------------------
|
||||
# what to install
|
||||
--
|
||||
2.45.1
|
||||
|
||||
@@ -1,754 +0,0 @@
|
||||
From 4afc1c3847ef926d7a29e18c6bf79cc4623805a1 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Fri, 3 Oct 2025 14:47:42 -0700
|
||||
Subject: [PATCH] magma ROCm 7 changes
|
||||
|
||||
From Jeff Daily's PR
|
||||
https://github.com/jeffdaily/magma/commit/1b966b72402e3f37ebd462f3d7e019e669e510ff
|
||||
|
||||
Signed-off-by: Tom Rix <Tom.Rix@amd.com>
|
||||
---
|
||||
include/magma_types.h | 5 ----
|
||||
interface_cuda/blas_h_v2.cpp | 16 +++++------
|
||||
magmablas/zgemm_batched.cpp | 4 +--
|
||||
magmablas/zgetf2_kernels.cu | 2 +-
|
||||
make.inc-examples/make.inc.hip-gcc-mkl | 6 ----
|
||||
make.inc-examples/make.inc.hip-gcc-openblas | 6 ----
|
||||
sparse/blas/magma_z_blaswrapper.cpp | 9 ------
|
||||
sparse/blas/magma_zmatrixtools_gpu.cu | 5 ----
|
||||
sparse/blas/magma_ztrisolve.cpp | 9 ------
|
||||
sparse/blas/zilu.cpp | 9 ------
|
||||
sparse/blas/zmergecg.cu | 10 -------
|
||||
sparse/control/magma_zmconvert.cpp | 5 ----
|
||||
sparse/control/magma_zmtranspose.cpp | 5 ----
|
||||
sparse/src/zcustomic.cpp | 5 ----
|
||||
sparse/src/zcustomilu.cpp | 5 ----
|
||||
sparse/src/zparict.cpp | 6 ----
|
||||
sparse/src/zparilut.cpp | 5 ----
|
||||
sparse/testing/testing_zspmm.cpp | 9 ------
|
||||
sparse/testing/testing_zspmv.cpp | 9 ------
|
||||
src/shpotrf_gpu.cpp | 8 +++---
|
||||
src/xhsgetrf_gpu.cpp | 32 ++++++++++-----------
|
||||
src/xshgetrf_gpu.cpp | 32 ++++++++++-----------
|
||||
testing/testing_sgemm_fp16.cpp | 16 +++++------
|
||||
testing/testing_zgemm_batched.cpp | 20 ++++++-------
|
||||
testing/testing_zgemv_batched.cpp | 20 ++++++-------
|
||||
testing/testing_zgeqrf_batched.cpp | 4 +--
|
||||
testing/testing_zgetrf_batched.cpp | 2 +-
|
||||
testing/testing_ztrsm_batched.cpp | 6 ++--
|
||||
tools/hipify-perl | 17 ++++-------
|
||||
tools/magmasubs.py | 3 +-
|
||||
30 files changed, 87 insertions(+), 203 deletions(-)
|
||||
|
||||
diff --git a/include/magma_types.h b/include/magma_types.h
|
||||
index 7f9c333c2643..2c03f5f8cc34 100644
|
||||
--- a/include/magma_types.h
|
||||
+++ b/include/magma_types.h
|
||||
@@ -227,8 +227,6 @@ typedef double real_Double_t;
|
||||
|
||||
/* double complex */
|
||||
|
||||
- //typedef hipblasDoubleComplex magmaDoubleComplex;
|
||||
-
|
||||
/* simple double complex definition that should be binary compatible with hipBLAS */
|
||||
typedef struct {
|
||||
|
||||
@@ -275,9 +273,6 @@ typedef double real_Double_t;
|
||||
|
||||
/* float complex */
|
||||
|
||||
- //typedef hipComplex magmaFloatComplex;
|
||||
- //typedef hipblasComplex magmaFloatComplex;
|
||||
-
|
||||
/* basic definition of float complex that should be binary compatible with hipBLAS */
|
||||
typedef struct {
|
||||
|
||||
diff --git a/interface_cuda/blas_h_v2.cpp b/interface_cuda/blas_h_v2.cpp
|
||||
index 2f61a1d6f5e2..56683fe294b0 100644
|
||||
--- a/interface_cuda/blas_h_v2.cpp
|
||||
+++ b/interface_cuda/blas_h_v2.cpp
|
||||
@@ -118,10 +118,10 @@ magma_hgemm(
|
||||
hipblas_trans_const( transA ),
|
||||
hipblas_trans_const( transB ),
|
||||
int(m), int(n), int(k),
|
||||
- (void*)&alpha, (void*)dA, HIPBLAS_R_16F, int(ldda),
|
||||
- (void*)dB, HIPBLAS_R_16F, int(lddb),
|
||||
- (void *)&beta, (void*)dC, HIPBLAS_R_16F, int(lddc),
|
||||
- HIPBLAS_R_16F,
|
||||
+ (void*)&alpha, (void*)dA, HIP_R_16F, int(ldda),
|
||||
+ (void*)dB, HIP_R_16F, int(lddb),
|
||||
+ (void *)&beta, (void*)dC, HIP_R_16F, int(lddc),
|
||||
+ HIPBLAS_COMPUTE_16F,
|
||||
HIPBLAS_GEMM_DEFAULT);
|
||||
}
|
||||
else {
|
||||
@@ -151,10 +151,10 @@ magma_hgemmx(
|
||||
hipblas_trans_const( transA ),
|
||||
hipblas_trans_const( transB ),
|
||||
int(m), int(n), int(k),
|
||||
- (void*)&alpha, (void*)dA, HIPBLAS_R_16F, int(ldda),
|
||||
- (void*)dB, HIPBLAS_R_16F, int(lddb),
|
||||
- (void*)&beta, (void*)dC, HIPBLAS_R_32F, int(lddc),
|
||||
- HIPBLAS_R_32F,
|
||||
+ (void*)&alpha, (void*)dA, HIP_R_16F, int(ldda),
|
||||
+ (void*)dB, HIP_R_16F, int(lddb),
|
||||
+ (void*)&beta, (void*)dC, HIP_R_32F, int(lddc),
|
||||
+ HIPBLAS_COMPUTE_32F,
|
||||
HIPBLAS_GEMM_DEFAULT);
|
||||
}
|
||||
else {
|
||||
diff --git a/magmablas/zgemm_batched.cpp b/magmablas/zgemm_batched.cpp
|
||||
index 2b4ea3ba1750..86a82971da0d 100644
|
||||
--- a/magmablas/zgemm_batched.cpp
|
||||
+++ b/magmablas/zgemm_batched.cpp
|
||||
@@ -28,13 +28,13 @@
|
||||
* */
|
||||
#ifdef PRECISION_z
|
||||
#ifdef MAGMA_HAVE_HIP
|
||||
- typedef hipblasDoubleComplex BackendFloat_t;
|
||||
+ typedef hipDoubleComplex BackendFloat_t;
|
||||
#else
|
||||
typedef cuDoubleComplex BackendFloat_t;
|
||||
#endif
|
||||
#elif defined(PRECISION_c)
|
||||
#ifdef MAGMA_HAVE_HIP
|
||||
- typedef hipblasComplex BackendFloat_t;
|
||||
+ typedef hipComplex BackendFloat_t;
|
||||
#else
|
||||
typedef cuFloatComplex BackendFloat_t;
|
||||
#endif
|
||||
diff --git a/magmablas/zgetf2_kernels.cu b/magmablas/zgetf2_kernels.cu
|
||||
index db8e7058fd11..3ff73755dda4 100644
|
||||
--- a/magmablas/zgetf2_kernels.cu
|
||||
+++ b/magmablas/zgetf2_kernels.cu
|
||||
@@ -211,7 +211,7 @@ magma_izamax_native(
|
||||
hipblasGetPointerMode(queue->hipblas_handle(), &ptr_mode);
|
||||
hipblasSetPointerMode(queue->hipblas_handle(), CUBLAS_POINTER_MODE_DEVICE);
|
||||
|
||||
- hipblasIzamax(queue->hipblas_handle(), length, (const hipblasDoubleComplex*)x, 1, (int*)(ipiv));
|
||||
+ hipblasIzamax(queue->hipblas_handle(), length, (const hipDoubleComplex*)x, 1, (int*)(ipiv));
|
||||
magma_zpivcast<<< 1, 1, 0, queue->cuda_stream() >>>( ipiv );
|
||||
|
||||
hipblasSetPointerMode(queue->hipblas_handle(), ptr_mode);
|
||||
diff --git a/make.inc-examples/make.inc.hip-gcc-mkl b/make.inc-examples/make.inc.hip-gcc-mkl
|
||||
index 4a7809e208f5..349835bbe2bd 100644
|
||||
--- a/make.inc-examples/make.inc.hip-gcc-mkl
|
||||
+++ b/make.inc-examples/make.inc.hip-gcc-mkl
|
||||
@@ -112,12 +112,6 @@ ifeq ($(BACKEND),cuda)
|
||||
DEVCCFLAGS += -Xcompiler "$(FPIC)" -Xcompiler "$(FOPENMP)" -std=c++11
|
||||
else ifeq ($(BACKEND),hip)
|
||||
DEVCCFLAGS += $(FPIC) $(FOPENMP) -std=c++11
|
||||
- # check for older versions of ROCM
|
||||
- ifeq ($(shell hipconfig --version | cut -b -3),3.0)
|
||||
- $(info Building with HIP 3.0)
|
||||
- # they don't have hipblasComplex yet, so replace it manually
|
||||
- DEVCCFLAGS += -DhipblasComplex=hipComplex -DhipblasDoubleComplex=hipDoubleComplex
|
||||
- endif
|
||||
endif
|
||||
|
||||
|
||||
diff --git a/make.inc-examples/make.inc.hip-gcc-openblas b/make.inc-examples/make.inc.hip-gcc-openblas
|
||||
index 2c6be03ea343..21e6d83396ce 100644
|
||||
--- a/make.inc-examples/make.inc.hip-gcc-openblas
|
||||
+++ b/make.inc-examples/make.inc.hip-gcc-openblas
|
||||
@@ -116,12 +116,6 @@ ifeq ($(BACKEND),cuda)
|
||||
DEVCCFLAGS += -Xcompiler "$(FPIC)" -Xcompiler "$(FOPENMP)" -std=c++11
|
||||
else ifeq ($(BACKEND),hip)
|
||||
DEVCCFLAGS += $(FPIC) $(FOPENMP) -std=c++11
|
||||
- # check for older versions of ROCM
|
||||
- ifeq ($(shell hipconfig --version | cut -b -3),3.0)
|
||||
- $(info Building with HIP 3.0)
|
||||
- # they don't have hipblasComplex yet, so replace it manually
|
||||
- DEVCCFLAGS += -DhipblasComplex=hipComplex -DhipblasDoubleComplex=hipDoubleComplex
|
||||
- endif
|
||||
endif
|
||||
|
||||
|
||||
diff --git a/sparse/blas/magma_z_blaswrapper.cpp b/sparse/blas/magma_z_blaswrapper.cpp
|
||||
index 1e3d3d887df7..e70618e3c812 100644
|
||||
--- a/sparse/blas/magma_z_blaswrapper.cpp
|
||||
+++ b/sparse/blas/magma_z_blaswrapper.cpp
|
||||
@@ -13,15 +13,6 @@
|
||||
|
||||
#define PRECISION_z
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#if defined(MAGMA_HAVE_HIP)
|
||||
- #ifdef PRECISION_z
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
- #elif defined(PRECISION_c)
|
||||
- #define hipblasComplex hipComplex
|
||||
- #endif
|
||||
-#endif
|
||||
-
|
||||
#if CUDA_VERSION >= 12000
|
||||
#define CUSPARSE_CSRMV_ALG2 CUSPARSE_SPMV_CSR_ALG2
|
||||
#define CUSPARSE_CSRMV_ALG1 CUSPARSE_SPMV_CSR_ALG1
|
||||
diff --git a/sparse/blas/magma_zmatrixtools_gpu.cu b/sparse/blas/magma_zmatrixtools_gpu.cu
|
||||
index dcb8952d0e04..ba801300d50e 100644
|
||||
--- a/sparse/blas/magma_zmatrixtools_gpu.cu
|
||||
+++ b/sparse/blas/magma_zmatrixtools_gpu.cu
|
||||
@@ -14,11 +14,6 @@
|
||||
|
||||
#define SWAP(a, b) { tmp = a; a = b; b = tmp; }
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#ifdef MAGMA_HAVE_HIP
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#endif
|
||||
-
|
||||
|
||||
|
||||
__global__ void
|
||||
diff --git a/sparse/blas/magma_ztrisolve.cpp b/sparse/blas/magma_ztrisolve.cpp
|
||||
index f51ee65b3462..29aa54b894f3 100644
|
||||
--- a/sparse/blas/magma_ztrisolve.cpp
|
||||
+++ b/sparse/blas/magma_ztrisolve.cpp
|
||||
@@ -13,15 +13,6 @@
|
||||
|
||||
#define PRECISION_z
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#if defined(MAGMA_HAVE_HIP)
|
||||
- #ifdef PRECISION_z
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
- #elif defined(PRECISION_c)
|
||||
- #define hipblasComplex hipComplex
|
||||
- #endif
|
||||
-#endif
|
||||
-
|
||||
magma_int_t magma_ztrisolve_analysis(magma_z_matrix M, magma_solve_info_t *solve_info, bool upper_triangular, bool unit_diagonal, bool transpose, magma_queue_t queue)
|
||||
{
|
||||
magma_int_t info = 0;
|
||||
diff --git a/sparse/blas/zilu.cpp b/sparse/blas/zilu.cpp
|
||||
index c3f5709bb678..4150de259830 100644
|
||||
--- a/sparse/blas/zilu.cpp
|
||||
+++ b/sparse/blas/zilu.cpp
|
||||
@@ -16,15 +16,6 @@
|
||||
|
||||
#define PRECISION_z
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#if defined(MAGMA_HAVE_HIP)
|
||||
- #ifdef PRECISION_z
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
- #elif defined(PRECISION_c)
|
||||
- #define hipblasComplex hipComplex
|
||||
- #endif
|
||||
-#endif
|
||||
-
|
||||
#if CUDA_VERSION >= 12000
|
||||
#define cusparseCreateCsrsm2Info(info)
|
||||
#define cusparseDestroyCsrsm2Info(info)
|
||||
diff --git a/sparse/blas/zmergecg.cu b/sparse/blas/zmergecg.cu
|
||||
index 7b472fa8591e..44d4b8720cd3 100644
|
||||
--- a/sparse/blas/zmergecg.cu
|
||||
+++ b/sparse/blas/zmergecg.cu
|
||||
@@ -13,16 +13,6 @@
|
||||
|
||||
#define PRECISION_z
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#if defined(MAGMA_HAVE_HIP)
|
||||
- #ifdef PRECISION_z
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
- #elif defined(PRECISION_c)
|
||||
- #define hipblasComplex hipComplex
|
||||
- #endif
|
||||
-#endif
|
||||
-
|
||||
-
|
||||
#define BLOCK_SIZE 512
|
||||
|
||||
#if CUDA_VERSION >= 12000
|
||||
diff --git a/sparse/control/magma_zmconvert.cpp b/sparse/control/magma_zmconvert.cpp
|
||||
index 15e9a3d9a52c..83948bb3a198 100644
|
||||
--- a/sparse/control/magma_zmconvert.cpp
|
||||
+++ b/sparse/control/magma_zmconvert.cpp
|
||||
@@ -13,11 +13,6 @@
|
||||
#include <cuda.h> // for CUDA_VERSION
|
||||
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#ifdef MAGMA_HAVE_HIP
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#endif
|
||||
-
|
||||
|
||||
// todo: check if we need buf later
|
||||
#if CUDA_VERSION >= 11000
|
||||
diff --git a/sparse/control/magma_zmtranspose.cpp b/sparse/control/magma_zmtranspose.cpp
|
||||
index 60b91f1d159b..9de0193495f5 100644
|
||||
--- a/sparse/control/magma_zmtranspose.cpp
|
||||
+++ b/sparse/control/magma_zmtranspose.cpp
|
||||
@@ -14,11 +14,6 @@
|
||||
|
||||
#include <cuda.h> // for CUDA_VERSION
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#ifdef MAGMA_HAVE_HIP
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#endif
|
||||
-
|
||||
// todo: check if we need buf later
|
||||
#if CUDA_VERSION >= 11000
|
||||
#define cusparseZcsr2csc(handle, m, n, nnz, valA, rowA, colA, valB, colB, rowB, \
|
||||
diff --git a/sparse/src/zcustomic.cpp b/sparse/src/zcustomic.cpp
|
||||
index 1859cd29fe7c..5214769be4f4 100644
|
||||
--- a/sparse/src/zcustomic.cpp
|
||||
+++ b/sparse/src/zcustomic.cpp
|
||||
@@ -16,11 +16,6 @@
|
||||
|
||||
#define COMPLEX
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#ifdef MAGMA_HAVE_HIP
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#endif
|
||||
-
|
||||
/**
|
||||
Purpose
|
||||
-------
|
||||
diff --git a/sparse/src/zcustomilu.cpp b/sparse/src/zcustomilu.cpp
|
||||
index c118125f3e37..2ceb8844a537 100644
|
||||
--- a/sparse/src/zcustomilu.cpp
|
||||
+++ b/sparse/src/zcustomilu.cpp
|
||||
@@ -16,11 +16,6 @@
|
||||
|
||||
#define COMPLEX
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#ifdef MAGMA_HAVE_HIP
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#endif
|
||||
-
|
||||
/**
|
||||
Purpose
|
||||
-------
|
||||
diff --git a/sparse/src/zparict.cpp b/sparse/src/zparict.cpp
|
||||
index d1e72ea7683c..8337fd69c8b3 100644
|
||||
--- a/sparse/src/zparict.cpp
|
||||
+++ b/sparse/src/zparict.cpp
|
||||
@@ -21,12 +21,6 @@
|
||||
#define PRECISION_z
|
||||
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#ifdef MAGMA_HAVE_HIP
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#endif
|
||||
-
|
||||
-
|
||||
/***************************************************************************//**
|
||||
Purpose
|
||||
-------
|
||||
diff --git a/sparse/src/zparilut.cpp b/sparse/src/zparilut.cpp
|
||||
index 55caecd13e3b..747e1620df63 100644
|
||||
--- a/sparse/src/zparilut.cpp
|
||||
+++ b/sparse/src/zparilut.cpp
|
||||
@@ -20,11 +20,6 @@
|
||||
|
||||
#define PRECISION_z
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#ifdef MAGMA_HAVE_HIP
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#endif
|
||||
-
|
||||
/***************************************************************************//**
|
||||
Purpose
|
||||
-------
|
||||
diff --git a/sparse/testing/testing_zspmm.cpp b/sparse/testing/testing_zspmm.cpp
|
||||
index de21c66c8c57..2682af900f53 100644
|
||||
--- a/sparse/testing/testing_zspmm.cpp
|
||||
+++ b/sparse/testing/testing_zspmm.cpp
|
||||
@@ -70,15 +70,6 @@
|
||||
|
||||
#define PRECISION_z
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#if defined(MAGMA_HAVE_HIP)
|
||||
- #ifdef PRECISION_z
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#elif defined(PRECISION_c)
|
||||
- #define hipblasComplex hipComplex
|
||||
- #endif
|
||||
-#endif
|
||||
-
|
||||
/* ////////////////////////////////////////////////////////////////////////////
|
||||
-- testing sparse matrix vector product
|
||||
*/
|
||||
diff --git a/sparse/testing/testing_zspmv.cpp b/sparse/testing/testing_zspmv.cpp
|
||||
index 2eff03f63f2a..776920a9cd4c 100644
|
||||
--- a/sparse/testing/testing_zspmv.cpp
|
||||
+++ b/sparse/testing/testing_zspmv.cpp
|
||||
@@ -72,15 +72,6 @@
|
||||
|
||||
#define PRECISION_z
|
||||
|
||||
-/* For hipSPARSE, they use a separate complex type than for hipBLAS */
|
||||
-#if defined(MAGMA_HAVE_HIP)
|
||||
- #ifdef PRECISION_z
|
||||
- #define hipblasDoubleComplex hipDoubleComplex
|
||||
-#elif defined(PRECISION_c)
|
||||
- #define hipblasComplex hipComplex
|
||||
- #endif
|
||||
-#endif
|
||||
-
|
||||
/* ////////////////////////////////////////////////////////////////////////////
|
||||
-- testing sparse matrix vector product
|
||||
*/
|
||||
diff --git a/src/shpotrf_gpu.cpp b/src/shpotrf_gpu.cpp
|
||||
index 6c57f2b57ab2..d981760d1057 100644
|
||||
--- a/src/shpotrf_gpu.cpp
|
||||
+++ b/src/shpotrf_gpu.cpp
|
||||
@@ -55,10 +55,10 @@ magma_sgemm_fp16(
|
||||
hipblasGemmEx( queue->hipblas_handle(),
|
||||
hipblas_trans_const( transA ), hipblas_trans_const( transB ),
|
||||
int(m), int(n), int(k),
|
||||
- (void*)&alpha, (void*)dhA, HIPBLAS_R_16F, (int)lddha,
|
||||
- (void*)dhB, HIPBLAS_R_16F, (int)lddhb,
|
||||
- (void*)&beta, (void*)dC, HIPBLAS_R_32F, (int)lddc,
|
||||
- HIPBLAS_R_32F, HIPBLAS_GEMM_DEFAULT);
|
||||
+ (void*)&alpha, (void*)dhA, HIP_R_16F, (int)lddha,
|
||||
+ (void*)dhB, HIP_R_16F, (int)lddhb,
|
||||
+ (void*)&beta, (void*)dC, HIP_R_32F, (int)lddc,
|
||||
+ HIPBLAS_COMPUTE_32F, HIPBLAS_GEMM_DEFAULT);
|
||||
return 0;
|
||||
#else
|
||||
return MAGMA_ERR_NOT_SUPPORTED;
|
||||
diff --git a/src/xhsgetrf_gpu.cpp b/src/xhsgetrf_gpu.cpp
|
||||
index 0a0a0c7aee04..7b1e9d61586a 100644
|
||||
--- a/src/xhsgetrf_gpu.cpp
|
||||
+++ b/src/xhsgetrf_gpu.cpp
|
||||
@@ -403,10 +403,10 @@ magma_xhsgetrf_gpu(
|
||||
hipblasGemmEx( queues[1]->hipblas_handle(),
|
||||
hipblas_trans_const( MagmaNoTrans ), hipblas_trans_const( MagmaNoTrans ),
|
||||
int(nextjb), int(m-nextj), int(jb),
|
||||
- &c_neg_one, dAT_hp(j, nextj), HIPBLAS_R_16F, int(lddat),
|
||||
- dAT_hp(nextj, j ), HIPBLAS_R_16F, int(lddat),
|
||||
- &c_one, dAT_hp(nextj, nextj), HIPBLAS_R_16F, int(lddat),
|
||||
- HIPBLAS_R_32F, ALGO);
|
||||
+ &c_neg_one, dAT_hp(j, nextj), HIP_R_16F, int(lddat),
|
||||
+ dAT_hp(nextj, j ), HIP_R_16F, int(lddat),
|
||||
+ &c_one, dAT_hp(nextj, nextj), HIP_R_16F, int(lddat),
|
||||
+ HIPBLAS_COMPUTE_32F, ALGO);
|
||||
#endif
|
||||
}
|
||||
else if( mp_algo_type == Magma_MP_GEMEX_I16_O16_C16 ) {
|
||||
@@ -422,10 +422,10 @@ magma_xhsgetrf_gpu(
|
||||
hipblasGemmEx( queues[1]->hipblas_handle(),
|
||||
hipblas_trans_const( MagmaNoTrans ), hipblas_trans_const( MagmaNoTrans ),
|
||||
int(nextjb), int(m-nextj), int(jb),
|
||||
- &h_neg_one, dAT_hp(j, nextj), HIPBLAS_R_16F, int(lddat),
|
||||
- dAT_hp(nextj, j ), HIPBLAS_R_16F, int(lddat),
|
||||
- &h_one, dAT_hp(nextj, nextj), HIPBLAS_R_16F, int(lddat),
|
||||
- HIPBLAS_R_16F, ALGO);
|
||||
+ &h_neg_one, dAT_hp(j, nextj), HIP_R_16F, int(lddat),
|
||||
+ dAT_hp(nextj, j ), HIP_R_16F, int(lddat),
|
||||
+ &h_one, dAT_hp(nextj, nextj), HIP_R_16F, int(lddat),
|
||||
+ HIPBLAS_COMPUTE_16F, ALGO);
|
||||
#endif
|
||||
}
|
||||
else if( mp_algo_type == Magma_MP_HGEMM ) {
|
||||
@@ -480,10 +480,10 @@ magma_xhsgetrf_gpu(
|
||||
hipblasGemmEx( queues[1]->hipblas_handle(),
|
||||
hipblas_trans_const( MagmaNoTrans ), hipblas_trans_const( MagmaNoTrans ),
|
||||
int(maxn-(nextj+nextjb)), int(m-nextj), int(jb),
|
||||
- &c_neg_one, dAT_hp(j, nextj+nextjb), HIPBLAS_R_16F, int(lddat),
|
||||
- dAT_hp(nextj, j ), HIPBLAS_R_16F, int(lddat),
|
||||
- &c_one, dAT_hp(nextj, nextj+nextjb), HIPBLAS_R_16F, int(lddat),
|
||||
- HIPBLAS_R_32F, ALGO);
|
||||
+ &c_neg_one, dAT_hp(j, nextj+nextjb), HIP_R_16F, int(lddat),
|
||||
+ dAT_hp(nextj, j ), HIP_R_16F, int(lddat),
|
||||
+ &c_one, dAT_hp(nextj, nextj+nextjb), HIP_R_16F, int(lddat),
|
||||
+ HIPBLAS_COMPUTE_32F, ALGO);
|
||||
#endif
|
||||
}
|
||||
else if( mp_algo_type == Magma_MP_GEMEX_I16_O16_C16 ) {
|
||||
@@ -499,10 +499,10 @@ magma_xhsgetrf_gpu(
|
||||
hipblasGemmEx( queues[1]->hipblas_handle(),
|
||||
hipblas_trans_const( MagmaNoTrans ), hipblas_trans_const( MagmaNoTrans ),
|
||||
int(maxn-(nextj+nextjb)), int(m-nextj), int(jb),
|
||||
- &h_neg_one, dAT_hp(j, nextj+nextjb), HIPBLAS_R_16F, int(lddat),
|
||||
- dAT_hp(nextj, j ), HIPBLAS_R_16F, int(lddat),
|
||||
- &h_one, dAT_hp(nextj, nextj+nextjb), HIPBLAS_R_16F, int(lddat),
|
||||
- HIPBLAS_R_16F, ALGO);
|
||||
+ &h_neg_one, dAT_hp(j, nextj+nextjb), HIP_R_16F, int(lddat),
|
||||
+ dAT_hp(nextj, j ), HIP_R_16F, int(lddat),
|
||||
+ &h_one, dAT_hp(nextj, nextj+nextjb), HIP_R_16F, int(lddat),
|
||||
+ HIPBLAS_COMPUTE_16F, ALGO);
|
||||
#endif
|
||||
}
|
||||
else if( mp_algo_type == Magma_MP_HGEMM ) {
|
||||
diff --git a/src/xshgetrf_gpu.cpp b/src/xshgetrf_gpu.cpp
|
||||
index 96ec1d97ffb8..345aefa1ad76 100644
|
||||
--- a/src/xshgetrf_gpu.cpp
|
||||
+++ b/src/xshgetrf_gpu.cpp
|
||||
@@ -270,10 +270,10 @@ magma_xshgetrf_gpu(
|
||||
hipblasGemmEx( queues[1]->hipblas_handle(),
|
||||
hipblas_trans_const( MagmaNoTrans ), hipblas_trans_const( MagmaNoTrans ),
|
||||
int(nextjb), int(m-nextj), int(jb),
|
||||
- &c_neg_one, dAtrsm1_hp, HIPBLAS_R_16F, int(maxnb),
|
||||
- dApanel_hp, HIPBLAS_R_16F, int(jb),
|
||||
- &c_one, dAT(nextj, nextj), HIPBLAS_R_32F, int(lddat),
|
||||
- HIPBLAS_R_32F, ALGO);
|
||||
+ &c_neg_one, dAtrsm1_hp, HIP_R_16F, int(maxnb),
|
||||
+ dApanel_hp, HIP_R_16F, int(jb),
|
||||
+ &c_one, dAT(nextj, nextj), HIP_R_32F, int(lddat),
|
||||
+ HIPBLAS_COMPUTE_32F, ALGO);
|
||||
#endif
|
||||
}
|
||||
else if( mp_algo_type == Magma_MP_GEMEX_I32_O32_C32 ) {
|
||||
@@ -289,10 +289,10 @@ magma_xshgetrf_gpu(
|
||||
hipblasGemmEx( queues[1]->hipblas_handle(),
|
||||
hipblas_trans_const( MagmaNoTrans ), hipblas_trans_const( MagmaNoTrans ),
|
||||
int(nextjb), int(m-nextj), int(jb),
|
||||
- &c_neg_one, dAT(j, nextj), HIPBLAS_R_32F, int(lddat),
|
||||
- dAT(nextj, j), HIPBLAS_R_32F, int(lddat),
|
||||
- &c_one, dAT(nextj, nextj), HIPBLAS_R_32F, int(lddat),
|
||||
- HIPBLAS_R_32F, ALGO);
|
||||
+ &c_neg_one, dAT(j, nextj), HIP_R_32F, int(lddat),
|
||||
+ dAT(nextj, j), HIP_R_32F, int(lddat),
|
||||
+ &c_one, dAT(nextj, nextj), HIP_R_32F, int(lddat),
|
||||
+ HIPBLAS_COMPUTE_32F, ALGO);
|
||||
#endif
|
||||
}
|
||||
else if( mp_algo_type == Magma_MP_SGEMM ) {
|
||||
@@ -343,10 +343,10 @@ magma_xshgetrf_gpu(
|
||||
hipblasGemmEx( queues[1]->hipblas_handle(),
|
||||
hipblas_trans_const( MagmaNoTrans ), hipblas_trans_const( MagmaNoTrans ),
|
||||
int(maxn-(nextj+nextjb)), int(m-nextj), int(jb),
|
||||
- &c_neg_one, dAtrsm2_hp , HIPBLAS_R_16F, int(maxm),
|
||||
- dApanel_hp , HIPBLAS_R_16F, int(jb),
|
||||
- &c_one, dAT(nextj, nextj+nextjb), HIPBLAS_R_32F, int(lddat),
|
||||
- HIPBLAS_R_32F, ALGO);
|
||||
+ &c_neg_one, dAtrsm2_hp , HIP_R_16F, int(maxm),
|
||||
+ dApanel_hp , HIP_R_16F, int(jb),
|
||||
+ &c_one, dAT(nextj, nextj+nextjb), HIP_R_32F, int(lddat),
|
||||
+ HIPBLAS_COMPUTE_32F, ALGO);
|
||||
#endif
|
||||
}
|
||||
else if( mp_algo_type == Magma_MP_GEMEX_I32_O32_C32 ) {
|
||||
@@ -362,10 +362,10 @@ magma_xshgetrf_gpu(
|
||||
hipblasGemmEx( queues[1]->hipblas_handle(),
|
||||
hipblas_trans_const( MagmaNoTrans ), hipblas_trans_const( MagmaNoTrans ),
|
||||
int(maxn-(nextj+nextjb)), int(m-nextj), int(jb),
|
||||
- &c_neg_one, dAT(j , nextj+nextjb), HIPBLAS_R_32F, int(lddat),
|
||||
- dAT(nextj, j ), HIPBLAS_R_32F, int(lddat),
|
||||
- &c_one, dAT(nextj, nextj+nextjb), HIPBLAS_R_32F, int(lddat),
|
||||
- HIPBLAS_R_32F, ALGO);
|
||||
+ &c_neg_one, dAT(j , nextj+nextjb), HIP_R_32F, int(lddat),
|
||||
+ dAT(nextj, j ), HIP_R_32F, int(lddat),
|
||||
+ &c_one, dAT(nextj, nextj+nextjb), HIP_R_32F, int(lddat),
|
||||
+ HIPBLAS_COMPUTE_32F, ALGO);
|
||||
#endif
|
||||
}
|
||||
else if( mp_algo_type == Magma_MP_SGEMM ) {
|
||||
diff --git a/testing/testing_sgemm_fp16.cpp b/testing/testing_sgemm_fp16.cpp
|
||||
index 84f24b95a4a4..0fbfc0b338fe 100644
|
||||
--- a/testing/testing_sgemm_fp16.cpp
|
||||
+++ b/testing/testing_sgemm_fp16.cpp
|
||||
@@ -59,10 +59,10 @@ magma_sgemm_fp16_v1(
|
||||
hipblasGemmEx( magma_queue_get_hipblas_handle( queue ),
|
||||
hipblas_trans_const( transA ), hipblas_trans_const( transB ),
|
||||
int(m), int(n), int(k),
|
||||
- (void*)&alpha, (void*)dhA, HIPBLAS_R_16F, (int)ldda,
|
||||
- (void*)dhB, HIPBLAS_R_16F, (int)lddb,
|
||||
- (void*)&beta, (void*)dC, HIPBLAS_R_32F, (int)lddc,
|
||||
- HIPBLAS_R_32F, HIPBLAS_GEMM_DEFAULT);
|
||||
+ (void*)&alpha, (void*)dhA, HIP_R_16F, (int)ldda,
|
||||
+ (void*)dhB, HIP_R_16F, (int)lddb,
|
||||
+ (void*)&beta, (void*)dC, HIP_R_32F, (int)lddc,
|
||||
+ HIPBLAS_COMPUTE_32F, HIPBLAS_GEMM_DEFAULT);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
@@ -97,10 +97,10 @@ magma_sgemm_fp16_v2(
|
||||
hipblasGemmEx( magma_queue_get_hipblas_handle( queue ),
|
||||
hipblas_trans_const( transA ), hipblas_trans_const( transB ),
|
||||
int(m), int(n), int(k),
|
||||
- (void*)&alpha, (void*)dhA, HIPBLAS_R_16F, (int)ldda,
|
||||
- (void*)dhB, HIPBLAS_R_16F, (int)lddb,
|
||||
- (void*)&beta, (void*)dC, HIPBLAS_R_32F, (int)lddc,
|
||||
- HIPBLAS_R_32F, HIPBLAS_GEMM_DEFAULT);
|
||||
+ (void*)&alpha, (void*)dhA, HIP_R_16F, (int)ldda,
|
||||
+ (void*)dhB, HIP_R_16F, (int)lddb,
|
||||
+ (void*)&beta, (void*)dC, HIP_R_32F, (int)lddc,
|
||||
+ HIPBLAS_COMPUTE_32F, HIPBLAS_GEMM_DEFAULT);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
diff --git a/testing/testing_zgemm_batched.cpp b/testing/testing_zgemm_batched.cpp
|
||||
index 0b300d1081be..350d0049865d 100644
|
||||
--- a/testing/testing_zgemm_batched.cpp
|
||||
+++ b/testing/testing_zgemm_batched.cpp
|
||||
@@ -190,11 +190,11 @@ int main( int argc, char** argv)
|
||||
hipblasZgemmBatched(
|
||||
opts.handle, cublas_trans_const(opts.transA), cublas_trans_const(opts.transB),
|
||||
int(M), int(N), int(K),
|
||||
- (const hipblasDoubleComplex*)&alpha,
|
||||
- (const hipblasDoubleComplex**) d_A_array, int(ldda),
|
||||
- (const hipblasDoubleComplex**) d_B_array, int(lddb),
|
||||
- (const hipblasDoubleComplex*)&beta,
|
||||
- (hipblasDoubleComplex**)d_C_array, int(lddc), int(batchCount) );
|
||||
+ (const hipDoubleComplex*)&alpha,
|
||||
+ (const hipDoubleComplex**) d_A_array, int(ldda),
|
||||
+ (const hipDoubleComplex**) d_B_array, int(lddb),
|
||||
+ (const hipDoubleComplex*)&beta,
|
||||
+ (hipDoubleComplex**)d_C_array, int(lddc), int(batchCount) );
|
||||
#endif
|
||||
}
|
||||
else{
|
||||
@@ -211,11 +211,11 @@ int main( int argc, char** argv)
|
||||
hipblasZgemmStridedBatched(
|
||||
opts.handle, cublas_trans_const(opts.transA), cublas_trans_const(opts.transB),
|
||||
int(M), int(N), int(K),
|
||||
- (const hipblasDoubleComplex*)&alpha,
|
||||
- (const hipblasDoubleComplex*) d_A, int(ldda), ldda * An,
|
||||
- (const hipblasDoubleComplex*) d_B, int(lddb), lddb * Bn,
|
||||
- (const hipblasDoubleComplex*)&beta,
|
||||
- (hipblasDoubleComplex*)d_C, int(lddc), lddc*N, int(batchCount) );
|
||||
+ (const hipDoubleComplex*)&alpha,
|
||||
+ (const hipDoubleComplex*) d_A, int(ldda), ldda * An,
|
||||
+ (const hipDoubleComplex*) d_B, int(lddb), lddb * Bn,
|
||||
+ (const hipDoubleComplex*)&beta,
|
||||
+ (hipDoubleComplex*)d_C, int(lddc), lddc*N, int(batchCount) );
|
||||
#endif
|
||||
}
|
||||
|
||||
diff --git a/testing/testing_zgemv_batched.cpp b/testing/testing_zgemv_batched.cpp
|
||||
index 9c63a405be17..be1dea4cfa6b 100644
|
||||
--- a/testing/testing_zgemv_batched.cpp
|
||||
+++ b/testing/testing_zgemv_batched.cpp
|
||||
@@ -182,11 +182,11 @@ int main( int argc, char** argv)
|
||||
#else
|
||||
hipblasZgemvBatched(opts.handle, hipblas_trans_const(opts.transA),
|
||||
M, N,
|
||||
- (const hipblasDoubleComplex *)&alpha,
|
||||
- (const hipblasDoubleComplex **)d_A_array, ldda,
|
||||
- (const hipblasDoubleComplex **)d_X_array, incx,
|
||||
- (const hipblasDoubleComplex *)&beta,
|
||||
- (hipblasDoubleComplex **)d_Y_array, incy, batchCount);
|
||||
+ (const hipDoubleComplex *)&alpha,
|
||||
+ (const hipDoubleComplex **)d_A_array, ldda,
|
||||
+ (const hipDoubleComplex **)d_X_array, incx,
|
||||
+ (const hipDoubleComplex *)&beta,
|
||||
+ (hipDoubleComplex **)d_Y_array, incy, batchCount);
|
||||
#endif
|
||||
}
|
||||
else{
|
||||
@@ -210,11 +210,11 @@ int main( int argc, char** argv)
|
||||
#else
|
||||
hipblasZgemvStridedBatched(opts.handle, hipblas_trans_const(opts.transA),
|
||||
M, N,
|
||||
- (const hipblasDoubleComplex *)&alpha,
|
||||
- (const hipblasDoubleComplex *)d_A, ldda, ldda*N,
|
||||
- (const hipblasDoubleComplex *)d_X, incx, incx*Xm,
|
||||
- (const hipblasDoubleComplex *)&beta,
|
||||
- (hipblasDoubleComplex *)d_Y, incy, incy*Ym, batchCount);
|
||||
+ (const hipDoubleComplex *)&alpha,
|
||||
+ (const hipDoubleComplex *)d_A, ldda, ldda*N,
|
||||
+ (const hipDoubleComplex *)d_X, incx, incx*Xm,
|
||||
+ (const hipDoubleComplex *)&beta,
|
||||
+ (hipDoubleComplex *)d_Y, incy, incy*Ym, batchCount);
|
||||
#endif
|
||||
}
|
||||
device_time = magma_sync_wtime( opts.queue ) - device_time;
|
||||
diff --git a/testing/testing_zgeqrf_batched.cpp b/testing/testing_zgeqrf_batched.cpp
|
||||
index 6cc62ab63055..acdb92754f44 100644
|
||||
--- a/testing/testing_zgeqrf_batched.cpp
|
||||
+++ b/testing/testing_zgeqrf_batched.cpp
|
||||
@@ -195,8 +195,8 @@ int main( int argc, char** argv)
|
||||
#endif
|
||||
#else
|
||||
hipblasZgeqrfBatched( opts.handle, int(M), int(N),
|
||||
- (hipblasDoubleComplex**)dA_array, int(ldda),
|
||||
- (hipblasDoubleComplex**)dtau_array,
|
||||
+ (hipDoubleComplex**)dA_array, int(ldda),
|
||||
+ (hipDoubleComplex**)dtau_array,
|
||||
&device_info, int(batchCount) );
|
||||
#endif
|
||||
|
||||
diff --git a/testing/testing_zgetrf_batched.cpp b/testing/testing_zgetrf_batched.cpp
|
||||
index ea9871364901..d23de5d934bd 100644
|
||||
--- a/testing/testing_zgetrf_batched.cpp
|
||||
+++ b/testing/testing_zgetrf_batched.cpp
|
||||
@@ -238,7 +238,7 @@ int main( int argc, char** argv)
|
||||
dinfo_device, int(batchCount) );
|
||||
#else
|
||||
hipblasZgetrfBatched( opts.handle, int(N),
|
||||
- (hipblasDoubleComplex**)dA_array, int(ldda), dipiv_device,
|
||||
+ (hipDoubleComplex**)dA_array, int(ldda), dipiv_device,
|
||||
dinfo_device, int(batchCount) );
|
||||
#endif
|
||||
}
|
||||
diff --git a/testing/testing_ztrsm_batched.cpp b/testing/testing_ztrsm_batched.cpp
|
||||
index c2b6d0d88660..7a9f7c449dda 100644
|
||||
--- a/testing/testing_ztrsm_batched.cpp
|
||||
+++ b/testing/testing_ztrsm_batched.cpp
|
||||
@@ -219,9 +219,9 @@ int main( int argc, char** argv)
|
||||
hipblasZtrsmBatched(
|
||||
opts.handle, cublas_side_const(opts.side), cublas_uplo_const(opts.uplo),
|
||||
cublas_trans_const(opts.transA), cublas_diag_const(opts.diag),
|
||||
- int(M), int(N), (const hipblasDoubleComplex*)&alpha,
|
||||
- (hipblasDoubleComplex* const*) d_A_array, int(ldda),
|
||||
- ( hipblasDoubleComplex**) d_B_array, int(lddb), int(batchCount) );
|
||||
+ int(M), int(N), (const hipDoubleComplex*)&alpha,
|
||||
+ (hipDoubleComplex* const*) d_A_array, int(ldda),
|
||||
+ ( hipDoubleComplex**) d_B_array, int(lddb), int(batchCount) );
|
||||
#endif
|
||||
|
||||
cublas_time = magma_sync_wtime( opts.queue ) - cublas_time;
|
||||
diff --git a/tools/hipify-perl b/tools/hipify-perl
|
||||
index 1a7f889eb1eb..aa6a06ec5592 100755
|
||||
--- a/tools/hipify-perl
|
||||
+++ b/tools/hipify-perl
|
||||
@@ -986,15 +986,8 @@ sub simpleSubstitutions {
|
||||
$ft{'type'} += s/\bcsrilu02Info_t\b/csrilu02Info_t/g;
|
||||
$ft{'type'} += s/\bcsrsv2Info_t\b/csrsv2Info_t/g;
|
||||
$ft{'type'} += s/\bcuComplex\b/hipComplex/g;
|
||||
-
|
||||
- # original type subs
|
||||
- #$ft{'type'} += s/\bcuDoubleComplex\b/hipDoubleComplex/g;
|
||||
- #$ft{'type'} += s/\bcuFloatComplex\b/hipFloatComplex/g;
|
||||
-
|
||||
- # replace them with hipBLAS types
|
||||
- $ft{'type'} += s/\bcuDoubleComplex\b/hipblasDoubleComplex/g;
|
||||
- $ft{'type'} += s/\bcuFloatComplex\b/hipblasFloatComplex/g;
|
||||
-
|
||||
+ $ft{'type'} += s/\bcuDoubleComplex\b/hipDoubleComplex/g;
|
||||
+ $ft{'type'} += s/\bcuFloatComplex\b/hipFloatComplex/g;
|
||||
$ft{'type'} += s/\bcublasDataType_t\b/hipblasDatatype_t/g;
|
||||
$ft{'type'} += s/\bcublasDiagType_t\b/hipblasDiagType_t/g;
|
||||
$ft{'type'} += s/\bcublasFillMode_t\b/hipblasFillMode_t/g;
|
||||
@@ -1210,9 +1203,9 @@ sub simpleSubstitutions {
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_UNKNOWN\b/hipErrorUnknown/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_UNMAP_FAILED\b/hipErrorUnmapFailed/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_UNSUPPORTED_LIMIT\b/hipErrorUnsupportedLimit/g;
|
||||
- $ft{'numeric_literal'} += s/\bCUDA_R_16F\b/HIPBLAS_R_16F/g;
|
||||
- $ft{'numeric_literal'} += s/\bCUDA_R_32F\b/HIPBLAS_R_32F/g;
|
||||
- $ft{'numeric_literal'} += s/\bCUDA_R_64F\b/HIPBLAS_R_64F/g;
|
||||
+ $ft{'numeric_literal'} += s/\bCUDA_R_16F\b/HIP_R_16F/g;
|
||||
+ $ft{'numeric_literal'} += s/\bCUDA_R_32F\b/HIP_R_32F/g;
|
||||
+ $ft{'numeric_literal'} += s/\bCUDA_R_64F\b/HIP_R_64F/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_SUCCESS\b/hipSuccess/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDNN_16BIT_INDICES\b/HIPDNN_16BIT_INDICES/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDNN_32BIT_INDICES\b/HIPDNN_32BIT_INDICES/g;
|
||||
diff --git a/tools/magmasubs.py b/tools/magmasubs.py
|
||||
index f1e0dabca776..00b8aed30295 100644
|
||||
--- a/tools/magmasubs.py
|
||||
+++ b/tools/magmasubs.py
|
||||
@@ -501,8 +501,7 @@ subs = {
|
||||
('float', 'double', 'cuFloatComplex', 'cuDoubleComplex' ),
|
||||
('float', 'double', 'hipFloatComplex', 'hipDoubleComplex' ),
|
||||
('CUDA_R_32F', 'CUDA_R_64F', 'CUDA_C_32F', 'CUDA_C_64F' ),
|
||||
- #('float', 'double', 'hipComplex', 'hipDoubleComplex' ),
|
||||
- ('float', 'double', 'hipblasComplex', 'hipblasDoubleComplex'),
|
||||
+ ('float', 'double', 'hipComplex', 'hipDoubleComplex' ),
|
||||
('float', 'double', 'MKL_Complex8', 'MKL_Complex16' ),
|
||||
('magmaFloat_const_ptr', 'magmaDouble_const_ptr','magmaFloatComplex_const_ptr', 'magmaDoubleComplex_const_ptr'), # before magmaDoubleComplex
|
||||
('magmaFloat_const_ptr', 'magmaDouble_const_ptr','magmaFloat_const_ptr', 'magmaDouble_const_ptr' ), # before magmaDoubleComplex
|
||||
--
|
||||
2.51.0
|
||||
|
||||
+37
-151
@@ -1,100 +1,50 @@
|
||||
%bcond gitcommit 1
|
||||
%if %{with gitcommit}
|
||||
%global commit0 4e4e95f2e5d52a7237cd04a1aec4501463156396
|
||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||
%global date0 20260210
|
||||
%endif
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# rocm toolchain uses the hipcc wrapper of clang
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-mtls-dialect=gnu2//' -e 's/-flto=thin//' )
|
||||
# lto problems
|
||||
# ld.lld: error: CMakeFiles/magma.dir/control/libmagma.so.2.9.0.lto.constants.cpp.o:(.rodata.magma_diag_const+0x4): relocation R_X86_64_PC32 out of range: 5412144436 is not in [-2147483648, 2147483647]; r
|
||||
%global _lto_cflags %nil
|
||||
%global toolchain clang
|
||||
|
||||
# Need to have a GPU in the build machine to test.
|
||||
# To speed up testing, only gfx1201 is built
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global gpu_list gfx1201
|
||||
%else
|
||||
%global gpu_list %{rocm_gpu_list_default}
|
||||
%endif
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
Name: magma
|
||||
Version: 2.9.0
|
||||
Version: 2.10.0
|
||||
Release: %autorelease
|
||||
Summary: Matrix Algebra on GPU and Multi-core Architectures
|
||||
License: BSD-3-Clause
|
||||
Url: https://icl.utk.edu/magma/
|
||||
License: BSD-3-Clause AND MIT
|
||||
# From the license check
|
||||
# The main license is BSD-3-Clause
|
||||
# COPYRIGHT
|
||||
# Then ICS, with file copied from OPEN BSD
|
||||
# control/strlcpy.cpp
|
||||
# Then MIT, with a file copied from hipify
|
||||
# hipify is used but not delivered
|
||||
# tools/hipify-perl
|
||||
# Reported GPL but not used, other similar files for cuda
|
||||
# results/v1.5.0/cuda7.0-k40c/setup.txt
|
||||
|
||||
# pickup newer commit for including Pull #27
|
||||
# https://github.com/icl-utk-edu/magma/pull/27
|
||||
%if %{with gitcommit}
|
||||
Source0: https://github.com/icl-utk-edu/%{name}/archive/%{commit0}.tar.gz
|
||||
%else
|
||||
VCS: git:https://github.com/icl-utk-edu/magma.git
|
||||
#!RemoteAsset: sha256:26347adbccbe7a6693d6b3f3c0ab5620037eb3a62b5ef69d05e40289472a82a4
|
||||
Source0: https://github.com/icl-utk-edu/%{name}/archive/v%{version}.tar.gz
|
||||
%endif
|
||||
|
||||
# For versioning the *.so's
|
||||
# https://bitbucket.org/icl/magma/issues/77/versioning-so
|
||||
Patch0: 0001-Prepare-magma-cmake-for-fedora.patch
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DBLA_VENDOR=OpenBLAS
|
||||
BuildOption(conf): -DAMDGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DMAGMA_ENABLE_HIP=ON
|
||||
BuildOption(conf): -DUSE_FORTRAN=OFF
|
||||
|
||||
# https://github.com/jeffdaily/magma/commit/1b966b72402e3f37ebd462f3d7e019e669e510ff
|
||||
Patch1: 0001-magma-ROCm-7-changes.patch
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hipblas)
|
||||
BuildRequires: cmake(hipsparse)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(openblas)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: openblas-devel
|
||||
BuildRequires: hipblas-devel
|
||||
BuildRequires: hipsparse-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: python3
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: rocm-comgr-devel
|
||||
# BuildRequires: rocm-core-devel
|
||||
BuildRequires: rocm-hip-devel
|
||||
BuildRequires: rocr-runtime-devel
|
||||
#BuildRequires: rocm-rpm-macros
|
||||
|
||||
# MIT
|
||||
# Just the hipify-perl file is taken and it is very old
|
||||
# This is older than any release of https://github.com/ROCm/HIPIFY.git
|
||||
# So setting to earliest release -1
|
||||
Provides: bundled(hipify) = 3.4.0
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
|
||||
%description
|
||||
Matrix Algebra on GPU and Multi-core Architectures (MAGMA) is a collection
|
||||
@@ -117,29 +67,22 @@ including functionalities for machine learning applications that use MAGMA
|
||||
as their computational back end. The MAGMA Sparse and MAGMA Batched
|
||||
packages have been included since MAGMA 1.6.
|
||||
|
||||
%package devel
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n %{name}-%{commit0}
|
||||
%else
|
||||
%autosetup -p1
|
||||
%endif
|
||||
%prep -a
|
||||
# Add newer gfx targets to Makefile's valid arch whitelist
|
||||
# https://bitbucket.org/icl/magma/issues/76/a-few-new-rocm-gpus
|
||||
sed -i -e 's@1032 1033@1032 1033 1100 1101 1102 1103 1150 1151 1152 1153 1200 1201@' Makefile
|
||||
|
||||
%if %{with test}
|
||||
# Just the test gpu gfx1201
|
||||
sed -i -e 's@1032 1033@1201@' Makefile
|
||||
# Remove some tests,
|
||||
# Remove a test that fails to link (undefined magma_generate_matrix)
|
||||
sed -i -e '/testing_zgenerate.cpp/d' testing/Makefile.src
|
||||
%else
|
||||
# Add some more gfx's
|
||||
# https://bitbucket.org/icl/magma/issues/76/a-few-new-rocm-gpus
|
||||
sed -i -e 's@1032 1033@950 1032 1033 1100 1101 1102 1103 1150 1151 1152 1153 1200 1201@' Makefile
|
||||
# Disable building tests
|
||||
sed -i -e 's@include_directories( testing )@#include_directories( testing )@' CMakeLists.txt
|
||||
sed -i -e 's@foreach( filename ${testing_all} )@foreach( filename ${no_testing_all} )@' CMakeLists.txt
|
||||
@@ -152,9 +95,6 @@ sed -i -e 's@add_custom_target( sparse-testing DEPENDS ${sparse-testing} )@#add_
|
||||
sed -i -e 's@DESTINATION lib@DESTINATION ${CMAKE_INSTALL_LIBDIR}@' CMakeLists.txt
|
||||
sed -i -e 's@DESTINATION bin@DESTINATION ${CMAKE_INSTALL_BINDIR}@' CMakeLists.txt
|
||||
|
||||
# Version *.so
|
||||
sed -i -e 's@magma_VERSION@"%{version}"@g' CMakeLists.txt
|
||||
|
||||
# python to python3, need env to find local bits like magmasubs.py
|
||||
sed -i -e 's@env python@env python3@' tools/checklist_run_tests.py
|
||||
sed -i -e 's@env python@env python3@' tools/check-style.py
|
||||
@@ -169,67 +109,14 @@ sed -i -e '/strlcpy/d' include/magma_auxiliary.h
|
||||
sed -i -e 's@magma_strlcpy@strlcpy@' control/trace.cpp
|
||||
rm control/strlcpy.cpp
|
||||
|
||||
# Policy CMP0037 may not be set to OLD behavior because this version of CMake
|
||||
sed -i -e 's@cmake_policy( SET CMP0037 OLD)@#cmake_policy( SET CMP0037 OLD)@' CMakeLists.txt
|
||||
|
||||
# Add offload-compress compile flags
|
||||
sed -i -e 's@-DROCM_VERSION@--offload-compress -DROCM_VERSION@' CMakeLists.txt
|
||||
|
||||
%build
|
||||
|
||||
#export HIP_PATH=`hipconfig -p`
|
||||
#export ROCM_PATH=`hipconfig -R`
|
||||
|
||||
%build -p
|
||||
echo "BACKEND = hip" > make.inc
|
||||
echo "FORT = false" >> make.inc
|
||||
%if %{with test}
|
||||
echo "GPU_TARGET = gfx1201" >> make.inc
|
||||
%else
|
||||
echo "GPU_TARGET = gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx950;gfx1010;gfx1012;gfx1030;gfx1031;gfx1035;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151;gfx1152;gfx1153;gfx1200;gfx1201" >> make.inc
|
||||
%endif
|
||||
echo "GPU_TARGET = gfx1100;gfx1200;gfx1201" >> make.inc
|
||||
|
||||
make generate
|
||||
|
||||
%cmake -G Ninja \
|
||||
-DBLA_VENDOR=OpenBLAS \
|
||||
-DCMAKE_BUILD_TYPE=%{build_type} \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DAMDGPU_TARGETS=%{gpu_list} \
|
||||
-DCMAKE_INSTALL_LIBDIR=%_libdir \
|
||||
-DMAGMA_ENABLE_HIP=ON \
|
||||
-DUSE_FORTRAN=OFF
|
||||
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
%cmake_install
|
||||
|
||||
%if %{with test}
|
||||
%check
|
||||
# Results should be something like
|
||||
# % MAGMA 2.9.0 svn 32-bit magma_int_t, 64-bit pointer.
|
||||
# % HIP runtime 70051831, driver 70051831. MAGMA not compiled with OpenMP.
|
||||
# % device 0: AMD Radeon Graphics, 2420.0 MHz clock, 16304.0 MiB memory, gcn arch gfx1201
|
||||
# % Sat Oct 4 08:05:46 2025
|
||||
# % Usage: redhat-linux-build/testing/testing_sgemm [options] [-h|--help]
|
||||
#
|
||||
# % If running lapack (option --lapack), MAGMA and HIPBLAS error are both computed
|
||||
# % relative to CPU BLAS result. Else, MAGMA error is computed relative to HIPBLAS result.
|
||||
#
|
||||
# % transA = No transpose, transB = No transpose
|
||||
# % M N K MAGMA Gflop/s (ms) HIPBLAS Gflop/s (ms) CPU Gflop/s (ms) MAGMA error HIPBLAS error
|
||||
# %========================================================================================================
|
||||
# 1088 1088 1088 1778.70 ( 1.45) 146.15 ( 17.63) --- ( --- ) 1.25e-08 --- ok
|
||||
# 2112 2112 2112 10940.92 ( 1.72) 10784.16 ( 1.75) --- ( --- ) 1.19e-08 --- ok
|
||||
# 3136 3136 3136 10919.38 ( 5.65) 11215.23 ( 5.50) --- ( --- ) 1.34e-08 --- ok
|
||||
# 4160 4160 4160 11144.04 ( 12.92) 12124.94 ( 11.87) --- ( --- ) 1.13e-08 --- ok
|
||||
# 5184 5184 5184 12999.31 ( 21.43) 14869.66 ( 18.74) --- ( --- ) 1.29e-08 --- ok
|
||||
# 6208 6208 6208 13411.76 ( 35.68) 14849.32 ( 32.22) --- ( --- ) 1.07e-08 --- ok
|
||||
# 7232 7232 7232 13335.70 ( 56.73) 14755.09 ( 51.27) --- ( --- ) 8.81e-09 --- ok
|
||||
# 8256 8256 8256 13441.97 ( 83.73) 14693.99 ( 76.59) --- ( --- ) 8.36e-09 --- ok
|
||||
# 9280 9280 9280 13347.32 ( 119.75) 14706.62 ( 108.68) --- ( --- ) 1.13e-08 --- ok
|
||||
# 10304 10304 10304 13291.48 ( 164.62) 14655.85 ( 149.29) --- ( --- ) 1.04e-08 --- ok
|
||||
%{_vpath_builddir}/testing/testing_sgemm
|
||||
%endif
|
||||
|
||||
@@ -245,5 +132,4 @@ make generate
|
||||
%{_libdir}/libmagma_sparse.so
|
||||
|
||||
%changelog
|
||||
* Mon Feb 2 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 2.9.0-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
From a8a1e5626512209fce844a273f55ef4b708c9249 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Sun, 21 Sep 2025 08:24:27 -0700
|
||||
Subject: [PATCH] miopen add link and compile pools
|
||||
|
||||
---
|
||||
CMakeLists.txt | 27 +++++++++++++++++++++++++++
|
||||
1 file changed, 27 insertions(+)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 514eb46ee71a..ea431a93ddc1 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -179,6 +179,33 @@ option(MIOPEN_EMBED_BUILD "Build with the set of embed flags." Off)
|
||||
option(MIOPEN_DISABLE_USERDB "Disable user database access" ${MIOPEN_EMBED_BUILD})
|
||||
option(MIOPEN_BUILD_CK "Build own CK libs inline with MIOpen build" OFF)
|
||||
|
||||
+#
|
||||
+# Seperate linking jobs from compiling
|
||||
+# Too many concurrent linking jobs can break the build
|
||||
+# Copied from LLVM
|
||||
+set(MIOPEN_PARALLEL_LINK_JOBS "" CACHE STRING
|
||||
+ "Define the maximum number of concurrent link jobs (Ninja only).")
|
||||
+if(CMAKE_GENERATOR MATCHES "Ninja")
|
||||
+ if(MIOPEN_PARALLEL_LINK_JOBS)
|
||||
+ set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${MIOPEN_PARALLEL_LINK_JOBS})
|
||||
+ set(CMAKE_JOB_POOL_LINK link_job_pool)
|
||||
+ endif()
|
||||
+elseif(MIOPEN_PARALLEL_LINK_JOBS)
|
||||
+ message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
+endif()
|
||||
+
|
||||
+set(MIOPEN_PARALLEL_COMPILE_JOBS "" CACHE STRING
|
||||
+ "Define the maximum number of concurrent compile jobs (Ninja only).")
|
||||
+if(CMAKE_GENERATOR MATCHES "Ninja")
|
||||
+ if(MIOPEN_PARALLEL_COMPILE_JOBS)
|
||||
+ set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${MIOPEN_PARALLEL_COMPILE_JOBS})
|
||||
+ set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
|
||||
+ endif()
|
||||
+elseif(MIOPEN_PARALLEL_COMPILE_JOBS)
|
||||
+ message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
+endif()
|
||||
+
|
||||
+
|
||||
# MIOPEN_USE_HIP_KERNELS is a Workaround for COMgr issues
|
||||
if(MIOPEN_EMBED_BUILD)
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build as a shared library" FORCE)
|
||||
--
|
||||
2.51.0
|
||||
|
||||
+89
-299
@@ -1,357 +1,147 @@
|
||||
%bcond gitcommit 0
|
||||
%if %{with gitcommit}
|
||||
%global commit0 2584e35062ad9c2edb68d93c464cf157bc57e3b0
|
||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||
%global date0 20250926
|
||||
%endif
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# Tests require an AMD GPU; keep the bcond for packagers with hardware.
|
||||
%bcond test 0
|
||||
|
||||
%global upstreamname MIOpen
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
# rocm stack builds with clang
|
||||
%global toolchain clang
|
||||
|
||||
%global miopen_name miopen%{pkg_suffix}
|
||||
|
||||
%global toolchain rocm
|
||||
|
||||
# hipcc does not support some clang flags
|
||||
# build_cxxflags does not honor CMAKE_BUILD_TYPE, strip out -g
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-g / /' -e 's/-mtls-dialect=gnu2//')
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
# For testing
|
||||
# hardcoded use of gtest and dirs is not suitable for mock building
|
||||
# Testsuite is not in great shape, fails instead of skips ck tests
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
# Change this to the gpu family you are testing on
|
||||
%bcond check 0
|
||||
%global gpu_test default
|
||||
%if %{with test}
|
||||
%if %{with check}
|
||||
# Do not build everything to do the test on one thing
|
||||
%global rocm_gpu_list %{gpu_test}
|
||||
%endif
|
||||
%endif
|
||||
|
||||
# Needs to match rocblas
|
||||
%bcond tensile 1
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
|
||||
%global gpu_list %{rocm_gpu_list_default}
|
||||
%global _gpu_list gfx1100
|
||||
|
||||
# Use ninja if it is available
|
||||
%bcond ninja 1
|
||||
|
||||
%if %{with ninja}
|
||||
%global cmake_generator -G Ninja
|
||||
%else
|
||||
%global cmake_generator %{nil}
|
||||
%endif
|
||||
|
||||
Name: %{miopen_name}
|
||||
%if %{with gitcommit}
|
||||
Version: git%{date0}.%{shortcommit0}
|
||||
Release: 1%{?dist}
|
||||
%else
|
||||
Name: miopen
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
%endif
|
||||
Release: %autorelease
|
||||
Summary: AMD's Machine Intelligence Library
|
||||
License: MIT AND BSD-2-Clause AND Apache-2.0 AND %{?fedora:LicenseRef-Fedora-Public-Domain}%{?suse_version:SUSE-Public-Domain}
|
||||
# The base license is MIT with a couple of exceptions
|
||||
# BSD-2-Clause
|
||||
# driver/mloSoftmaxHost.hpp
|
||||
# src/include/miopen/mlo_internal.hpp
|
||||
# Apache-2.0
|
||||
# src/include/miopen/kernel_cache.hpp
|
||||
# src/kernel_cache.cpp
|
||||
# Public Domain
|
||||
# src/md5.cpp
|
||||
License: MIT AND BSD-2-Clause AND Apache-2.0
|
||||
Url: https://github.com/ROCm/MIOpen
|
||||
#!RemoteAsset: sha256:98c72a2b5ca541d6c172facdf0f15729207ab52ca9af36c00e2480c5b27c5b99
|
||||
Source: %{url}/archive/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
%if %{with gitcommit}
|
||||
Url: https://github.com/ROCm/rocm-libraries
|
||||
Source0: %{url}/archive/%{commit0}/rocm-libraries-%{shortcommit0}.tar.gz
|
||||
# Adds MIOPEN_PARALLEL_{COMPILE,LINK}_JOBS options to limit Ninja job pools
|
||||
# and avoid OOM on memory-constrained build hosts (upstream patch)
|
||||
Patch0: 0001-miopen-add-link-and-compile-pools.patch
|
||||
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBoost_USE_STATIC_LIBS=OFF
|
||||
BuildOption(conf): -DMIOPEN_BUILD_DRIVER=OFF
|
||||
BuildOption(conf): -DMIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK=OFF
|
||||
BuildOption(conf): -DMIOPEN_ENABLE_AI_KERNEL_TUNING=OFF
|
||||
%if %{with test}
|
||||
BuildOption(conf): -DBUILD_TESTING=ON
|
||||
BuildOption(conf): -DMIOPEN_TEST_ALL=ON
|
||||
%else
|
||||
Url: https://github.com/ROCm/%{upstreamname}
|
||||
Source0: %{url}/archive/rocm-%{version}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
BuildOption(conf): -DBUILD_TESTING=OFF
|
||||
%endif
|
||||
|
||||
# So we do not thrash memory
|
||||
Patch1: 0001-miopen-add-link-and-compile-pools.patch
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: pkgconfig(eigen3)
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: fdupes
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: fplus-devel
|
||||
BuildRequires: frugally-deep-devel
|
||||
BuildRequires: half-devel
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
BuildRequires: pkgconfig(nlohmann_json)
|
||||
BuildRequires: hipblas%{pkg_suffix}-devel
|
||||
BuildRequires: rocblas%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
BuildRequires: rocrand%{pkg_suffix}-devel
|
||||
BuildRequires: roctracer%{pkg_suffix}-devel
|
||||
BuildRequires: pkgconfig(sqlite3)
|
||||
BuildRequires: zlib-devel
|
||||
# Disable optional backends not yet packaged on openRuyi
|
||||
BuildOption(conf): -DMIOPEN_USE_COMPOSABLEKERNEL=OFF
|
||||
BuildOption(conf): -DMIOPEN_USE_MLIR=OFF
|
||||
|
||||
BuildRequires: boost-devel
|
||||
BuildRequires: pkgconfig(bzip2)
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hipblaslt)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocblas)
|
||||
BuildRequires: cmake(rocrand)
|
||||
%if %{with test}
|
||||
BuildRequires: gmock-devel
|
||||
BuildRequires: gtest-devel
|
||||
BuildRequires: cmake(GTest)
|
||||
%endif
|
||||
|
||||
%if %{with ninja}
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: half
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: hipblas-common-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
%define __builder ninja
|
||||
%endif
|
||||
BuildRequires: pkgconfig(bzip2)
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
BuildRequires: pkgconfig(nlohmann_json)
|
||||
BuildRequires: pkgconfig(sqlite3)
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
# roctracer uses find_path/find_library rather than find_package; no cmake()/pkgconfig() provided
|
||||
# FIXME
|
||||
BuildRequires: roctracer-devel
|
||||
|
||||
Provides: miopen%{pkg_suffix} = %{version}-%{release}
|
||||
|
||||
# Use ROCm devel at runtime
|
||||
Requires: rocm-hip%{pkg_suffix}-devel
|
||||
Requires: rocrand%{pkg_suffix}-devel
|
||||
# Also needs c++ to work
|
||||
# From pytorch-examples/word_language_model
|
||||
# MIOpen(HIP): Error [BuildHip] HIPRTC status = HIPRTC_ERROR_COMPILATION (6), source file: MIOpenDropoutHIP.cpp
|
||||
# MIOpen(HIP): Warning [BuildHip] In file included from /tmp/comgr-78a343/input/MIOpenDropoutHIP.cpp:32:
|
||||
# In file included from /tmp/comgr-78a343/include/miopen_rocrand.hpp:45:
|
||||
# In file included from /usr/include/rocrand/rocrand_xorwow.h:24:
|
||||
# /usr/include/rocrand/rocrand_common.h:39:10: fatal error: 'utility' file not found
|
||||
# 39 | #include <utility>
|
||||
# | ^~~~~~~~~
|
||||
Requires: cmake(hip)
|
||||
Requires: cmake(rocrand)
|
||||
Requires: gcc-c++
|
||||
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
|
||||
%description
|
||||
AMD's library for high performance machine learning primitives.
|
||||
MIOpen supports convolution, batch normalization, activation, pooling,
|
||||
RNN/LSTM/GRU, and attention/transformer operations for the HIP backend.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Provides: miopen%{pkg_suffix}-devel = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n rocm-libraries-%{commit0}
|
||||
cd projects/miopen
|
||||
%patch -P1 -p1
|
||||
%else
|
||||
%autosetup -p1 -n %{upstreamname}-rocm-%{version}
|
||||
%endif
|
||||
|
||||
# Readme has executable bit
|
||||
chmod 644 README.md
|
||||
|
||||
# clang-tidy is brittle and not needed for rebuilding from a tarball
|
||||
%prep -a
|
||||
# clang-tidy is brittle and not needed when rebuilding from a tarball
|
||||
sed -i -e 's@clang-tidy@true@' cmake/ClangTidy.cmake
|
||||
|
||||
# workaround error on finding lbunzip2
|
||||
sed -i -e 's@lbunzip2 bunzip2@bunzip2@' CMakeLists.txt
|
||||
|
||||
# https://github.com/ROCm/MIOpen/issues/2672
|
||||
sed -i -e 's@find_path(HALF_INCLUDE_DIR half/half.hpp)@#find_path(HALF_INCLUDE_DIR half/half.hpp)@' CMakeLists.txt
|
||||
# #include <half/half.hpp> -> <half.hpp>
|
||||
for f in `find . -type f -name '*.hpp' -o -name '*.cpp' `; do
|
||||
sed -i -e 's@#include <half/half.hpp>@#include <half.hpp>@' $f
|
||||
done
|
||||
# On 6.4.0
|
||||
# ../test/verify.hpp:198:56: error: no member named 'expr' in namespace 'half_float::detail'
|
||||
# 198 | if constexpr(std::is_same_v<T, half_float::detail::expr>)
|
||||
# This is not our float, hack it out
|
||||
# half_float::detail::expr is not present in all half versions
|
||||
sed -i -e 's@std::is_same_v<T, half_float::detail::expr>@0@' test/verify.hpp
|
||||
|
||||
# Tries to download its own googletest
|
||||
# No good knob to turn it off so hack the cmake
|
||||
# MIOpen tries to download googletest; disable when not needed
|
||||
%if %{without test}
|
||||
sed -i -e 's@add_subdirectory(test)@#add_subdirectory(test)@' CMakeLists.txt
|
||||
sed -i -e 's@add_subdirectory(speedtests)@#add_subdirectory(speedtests)@' CMakeLists.txt
|
||||
%endif
|
||||
|
||||
%if %{without tensile}
|
||||
sed -i -e 's@#define ROCBLAS_BETA_FEATURES_API 1@#define ROCBLAS_BETA_FEATURES_API 0@' src/include/miopen/handle.hpp
|
||||
sed -i -e 's@#define ROCBLAS_BETA_FEATURES_API 1@#define ROCBLAS_BETA_FEATURES_API 0@' src/solver/mha/mha_common.hpp
|
||||
sed -i -e 's@#define ROCBLAS_BETA_FEATURES_API 1@#define ROCBLAS_BETA_FEATURES_API 0@' src/gemm_v2.cpp
|
||||
%endif
|
||||
# Use the standard data directory for the MIOpen kernel database
|
||||
sed -i -e 's@GetLibPath().parent_path() / "share/miopen/db"@"%{_datadir}/miopen/db"@' src/db_path.cpp.in
|
||||
|
||||
# Our use of modules confuse install locations
|
||||
# The db is not installed relative to the lib dir.
|
||||
# Hardcode its location
|
||||
sed -i -e 's@GetLibPath().parent_path() / "share/miopen/db"@"/usr/share/miopen/db"@' src/db_path.cpp.in
|
||||
|
||||
# Unsupported compiler flags
|
||||
# -fno-offload-uniform-block is unsupported on this ROCm version
|
||||
sed -i -e 's@opts.push_back("-fno-offload-uniform-block");@//opts.push_back("-fno-offload-uniform-block");@' src/comgr.cpp
|
||||
|
||||
# Paths to clang
|
||||
# Fix the path used to locate the ROCm clang binary at build time
|
||||
sed -i -e 's@llvm/bin/clang@bin/clang@' src/hip/hip_build_utils.cpp
|
||||
|
||||
%build
|
||||
%if %{with gitcommit}
|
||||
cd projects/miopen
|
||||
%endif
|
||||
|
||||
|
||||
# Real cores, No hyperthreading
|
||||
COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'`
|
||||
if [ ${COMPILE_JOBS}x = x ]; then
|
||||
COMPILE_JOBS=1
|
||||
fi
|
||||
# Take into account memmory usage per core, do not thrash real memory
|
||||
BUILD_MEM=4
|
||||
MEM_KB=0
|
||||
MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'`
|
||||
MEM_MB=`eval "expr ${MEM_KB} / 1024"`
|
||||
MEM_GB=`eval "expr ${MEM_MB} / 1024"`
|
||||
COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"`
|
||||
if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then
|
||||
COMPILE_JOBS=$COMPILE_JOBS_MEM
|
||||
fi
|
||||
LINK_MEM=32
|
||||
LINK_JOBS=`eval "expr 1 + ${MEM_GB} / ${LINK_MEM}"`
|
||||
|
||||
#%{?suse_version:%{?build_cxxflags:CXXFLAGS="%{build_cxxflags}"}}
|
||||
|
||||
%cmake %{cmake_generator} \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DROCM_SYMLINK_LIBS=OFF \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DGPU_TARGETS=%{gpu_list} \
|
||||
-DBUILD_TESTING=%{build_test} \
|
||||
%{?build_type:-DCMAKE_BUILD_TYPE=%{build_type}} \
|
||||
-DCMAKE_SKIP_RPATH=ON \
|
||||
-DBoost_USE_STATIC_LIBS=OFF \
|
||||
-DMIOPEN_PARALLEL_COMPILE_JOBS=$COMPILE_JOBS \
|
||||
-DMIOPEN_PARALLEL_LINK_JOBS=$LINK_JOBS \
|
||||
-DMIOPEN_BACKEND=HIP \
|
||||
-DMIOPEN_BUILD_DRIVER=OFF \
|
||||
-DMIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK=OFF \
|
||||
-DMIOPEN_ENABLE_AI_KERNEL_TUNING=OFF \
|
||||
-DMIOPEN_TEST_ALL=%{build_test} \
|
||||
-DMIOPEN_USE_HIPBLASLT=OFF \
|
||||
-DMIOPEN_USE_MLIR=OFF \
|
||||
-DMIOPEN_USE_COMPOSABLEKERNEL=OFF
|
||||
|
||||
%cmake_build
|
||||
|
||||
%if %{with test}
|
||||
%cmake_build -t tests
|
||||
%endif
|
||||
|
||||
%if %{with test}
|
||||
%if %{with check}
|
||||
%check
|
||||
find . -name 'libMIOpen.so.1'
|
||||
export LD_LIBRARY_PATH=${PWD}/%{_vpath_builddir}/lib:$LD_LIBRARY_PATH
|
||||
%ctest
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%install
|
||||
%if %{with gitcommit}
|
||||
cd projects/miopen
|
||||
%endif
|
||||
%cmake_install
|
||||
|
||||
# Extra license
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/miopen-hip/LICENSE.md
|
||||
|
||||
%fdupes %{buildroot}%{pkg_prefix}
|
||||
|
||||
%post -p /sbin/ldconfig
|
||||
%postun -p /sbin/ldconfig
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/miopen-hip/LICENSE.md
|
||||
|
||||
%files
|
||||
%if %{with gitcommit}
|
||||
%doc projects/miopen/README.md
|
||||
%license projects/miopen/LICENSE.md
|
||||
%else
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%endif
|
||||
|
||||
%{pkg_prefix}/%{pkg_libdir}/libMIOpen.so.1{,.*}
|
||||
%{pkg_prefix}/libexec/miopen/
|
||||
%{_libdir}/libMIOpen.so.1{,.*}
|
||||
%{_libexecdir}/miopen/
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/share/miopen/
|
||||
%{pkg_prefix}/include/miopen/
|
||||
%{pkg_prefix}/%{pkg_libdir}/libMIOpen.so
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/miopen/
|
||||
%{_datadir}/miopen/
|
||||
%{_includedir}/miopen/
|
||||
%{_libdir}/cmake/miopen/
|
||||
%{_libdir}/libMIOpen.so
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{pkg_prefix}/bin/test*
|
||||
%{_bindir}/test*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Feb 2 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
From 53d2ea9ad3cc20e1beac2e1c014082c25e221182 Mon Sep 17 00:00:00 2001
|
||||
From: Takatoshi Kondo <redboltz@gmail.com>
|
||||
Date: Sun, 26 Aug 2018 10:58:47 +0900
|
||||
Subject: [PATCH] Fixed #724.
|
||||
|
||||
Fixed type mismatch in msgpack_timestamp.
|
||||
Added 64bit singed postfix.
|
||||
---
|
||||
include/msgpack/timestamp.h | 8 ++++++--
|
||||
include/msgpack/v1/adaptor/cpp11/chrono.hpp | 4 ++--
|
||||
2 files changed, 8 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/include/msgpack/timestamp.h b/include/msgpack/timestamp.h
|
||||
index 4d7df83d..76139312 100644
|
||||
--- a/include/msgpack/timestamp.h
|
||||
+++ b/include/msgpack/timestamp.h
|
||||
@@ -28,13 +28,17 @@ static inline bool msgpack_object_to_timestamp(const msgpack_object* obj, msgpac
|
||||
switch (obj->via.ext.size) {
|
||||
case 4:
|
||||
ts->tv_nsec = 0;
|
||||
- _msgpack_load32(uint32_t, obj->via.ext.ptr, &ts->tv_sec);
|
||||
+ {
|
||||
+ uint32_t v;
|
||||
+ _msgpack_load32(uint32_t, obj->via.ext.ptr, &v);
|
||||
+ ts->tv_sec = v;
|
||||
+ }
|
||||
return true;
|
||||
case 8: {
|
||||
uint64_t value;
|
||||
_msgpack_load64(uint64_t, obj->via.ext.ptr, &value);
|
||||
ts->tv_nsec = (uint32_t)(value >> 34);
|
||||
- ts->tv_sec = value & 0x00000003ffffffffL;
|
||||
+ ts->tv_sec = value & 0x00000003ffffffffLL;
|
||||
return true;
|
||||
}
|
||||
case 12:
|
||||
diff --git a/include/msgpack/v1/adaptor/cpp11/chrono.hpp b/include/msgpack/v1/adaptor/cpp11/chrono.hpp
|
||||
index 1e08355e..db2035b7 100644
|
||||
--- a/include/msgpack/v1/adaptor/cpp11/chrono.hpp
|
||||
+++ b/include/msgpack/v1/adaptor/cpp11/chrono.hpp
|
||||
@@ -41,7 +41,7 @@ struct as<std::chrono::system_clock::time_point> {
|
||||
uint64_t value;
|
||||
_msgpack_load64(uint64_t, o.via.ext.data(), &value);
|
||||
uint32_t nanosec = static_cast<uint32_t>(value >> 34);
|
||||
- uint64_t sec = value & 0x00000003ffffffffL;
|
||||
+ uint64_t sec = value & 0x00000003ffffffffLL;
|
||||
tp += std::chrono::duration_cast<std::chrono::system_clock::duration>(
|
||||
std::chrono::nanoseconds(nanosec));
|
||||
tp += std::chrono::seconds(sec);
|
||||
@@ -79,7 +79,7 @@ struct convert<std::chrono::system_clock::time_point> {
|
||||
uint64_t value;
|
||||
_msgpack_load64(uint64_t, o.via.ext.data(), &value);
|
||||
uint32_t nanosec = static_cast<uint32_t>(value >> 34);
|
||||
- uint64_t sec = value & 0x00000003ffffffffL;
|
||||
+ uint64_t sec = value & 0x00000003ffffffffLL;
|
||||
tp += std::chrono::duration_cast<std::chrono::system_clock::duration>(
|
||||
std::chrono::nanoseconds(nanosec));
|
||||
tp += std::chrono::seconds(sec);
|
||||
--
|
||||
2.17.1
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
From 232fff18d4f07aa25338da88ce704675f9fea465 Mon Sep 17 00:00:00 2001
|
||||
From: Takatoshi Kondo <redboltz@gmail.com>
|
||||
Date: Tue, 6 Aug 2024 09:36:04 +0900
|
||||
Subject: [PATCH 1/5] Fixed cmake warnings.
|
||||
|
||||
---
|
||||
CMakeLists.txt | 20 +++++++++++++++++---
|
||||
1 file changed, 17 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 8dc6d610a..c75c908f9 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -1,8 +1,7 @@
|
||||
-CMAKE_MINIMUM_REQUIRED (VERSION 2.8.12)
|
||||
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
|
||||
|
||||
-IF ((CMAKE_VERSION VERSION_GREATER 3.1) OR
|
||||
- (CMAKE_VERSION VERSION_EQUAL 3.1))
|
||||
- CMAKE_POLICY(SET CMP0054 NEW)
|
||||
+IF (MSGPACK_USE_BOOST)
|
||||
+ CMAKE_POLICY(SET CMP0167 NEW)
|
||||
ENDIF ()
|
||||
|
||||
PROJECT (msgpack)
|
||||
@@ -285,7 +285,6 @@
|
||||
# MEMORYCHECK_COMMAND_OPTIONS needs to place prior to CTEST_MEMORYCHECK_COMMAND
|
||||
SET (MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --show-leak-kinds=definite,possible --error-exitcode=1")
|
||||
FIND_PROGRAM(CTEST_MEMORYCHECK_COMMAND NAMES valgrind)
|
||||
- INCLUDE(Dart)
|
||||
ADD_SUBDIRECTORY (test)
|
||||
ENDIF ()
|
||||
|
||||
diff --git a/include/msgpack/type.hpp b/include/msgpack/type.hpp
|
||||
index 1ab49745f..9ef3e86d3 100644
|
||||
--- a/include/msgpack/type.hpp
|
||||
+++ b/include/msgpack/type.hpp
|
||||
@@ -60,7 +60,9 @@
|
||||
#if defined(MSGPACK_USE_BOOST)
|
||||
|
||||
#include "adaptor/boost/fusion.hpp"
|
||||
+#if !defined(MSGPACK_USE_CPP03)
|
||||
#include "adaptor/boost/msgpack_variant.hpp"
|
||||
+#endif // !defined(MSGPACK_USE_CPP03)
|
||||
#include "adaptor/boost/optional.hpp"
|
||||
#include "adaptor/boost/string_ref.hpp"
|
||||
#include "adaptor/boost/string_view.hpp"
|
||||
@@ -1,63 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
Name: msgpack
|
||||
Version: 3.1.0
|
||||
Release: %autorelease
|
||||
Summary: Binary-based efficient object serialization library
|
||||
License: BSL-1.0
|
||||
URL: http://msgpack.org
|
||||
#!RemoteAsset
|
||||
Source0: https://github.com/msgpack/msgpack-c/releases/download/cpp-%{version}/%{name}-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -DCMAKE_POLICY_VERSION_MINIMUM=3.5
|
||||
|
||||
# https://github.com/msgpack/msgpack-c/commit/53d2ea9ad3cc20e1beac2e1c014082c25e221182
|
||||
Patch0: 0001-Fixed-724.patch
|
||||
Patch1: 0002-msgpack-cmake4.patch
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: doxygen
|
||||
# for %%check
|
||||
BuildRequires: pkgconfig(gtest)
|
||||
BuildRequires: pkgconfig(zlib)
|
||||
|
||||
%description
|
||||
MessagePack is a binary-based efficient object serialization
|
||||
library. It enables to exchange structured objects between many
|
||||
languages like JSON. But unlike JSON, it is very fast and small.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and header files for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
Libraries and header files for %{name}
|
||||
|
||||
%prep -a
|
||||
# gtest 1.17.0 requires at least C++17
|
||||
sed -i "s|-std=c++98|-std=gnu++17|g" CMakeLists.txt
|
||||
|
||||
%check -p
|
||||
# https://github.com/msgpack/msgpack-c/issues/697
|
||||
export GTEST_FILTER=-object_with_zone.ext_empty
|
||||
|
||||
%files
|
||||
%license LICENSE_1_0.txt COPYING
|
||||
%doc AUTHORS ChangeLog NOTICE README README.md
|
||||
%{_libdir}/*.so.*
|
||||
|
||||
%files devel
|
||||
%{_includedir}/*
|
||||
%{_libdir}/*.so
|
||||
%{_libdir}/pkgconfig/msgpack.pc
|
||||
%{_libdir}/cmake/msgpack
|
||||
|
||||
%changelog
|
||||
%autochangelog
|
||||
@@ -0,0 +1,22 @@
|
||||
From b976f614ed2ce3bf98f15e9a93761aafe15ba5a9 Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Mon, 16 Mar 2026 15:26:02 +0800
|
||||
Subject: [PATCH] disable httpmuxgo121 on newer version of go
|
||||
|
||||
---
|
||||
main.go | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/main.go b/main.go
|
||||
index 650e03a..9a343f3 100644
|
||||
--- a/main.go
|
||||
+++ b/main.go
|
||||
@@ -1,3 +1,5 @@
|
||||
+//go:debug httpmuxgo121=0
|
||||
+
|
||||
package main
|
||||
|
||||
import (
|
||||
--
|
||||
2.53.0
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
From eb50883178cae3a721ca8658dde6988ee22c8918 Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Mon, 16 Mar 2026 15:45:18 +0800
|
||||
Subject: [PATCH] use lib64 instead of lib
|
||||
|
||||
---
|
||||
CMakeLists.txt | 4 ++--
|
||||
discover/types.go | 2 +-
|
||||
ml/backend/ggml/ggml/src/ggml.go | 2 +-
|
||||
ml/path.go | 2 +-
|
||||
4 files changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 1aa976a..4bd9250 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -37,8 +37,8 @@ if (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
|
||||
set(CMAKE_INSTALL_RPATH "@loader_path")
|
||||
endif()
|
||||
|
||||
-set(OLLAMA_BUILD_DIR ${CMAKE_BINARY_DIR}/lib/ollama)
|
||||
-set(OLLAMA_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib/ollama/${OLLAMA_RUNNER_DIR})
|
||||
+set(OLLAMA_BUILD_DIR ${CMAKE_BINARY_DIR}/lib64/ollama)
|
||||
+set(OLLAMA_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/lib64/ollama/${OLLAMA_RUNNER_DIR})
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OLLAMA_BUILD_DIR})
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${OLLAMA_BUILD_DIR})
|
||||
diff --git a/discover/types.go b/discover/types.go
|
||||
index efc69ec..5848ea4 100644
|
||||
--- a/discover/types.go
|
||||
+++ b/discover/types.go
|
||||
@@ -31,7 +31,7 @@ func LogDetails(devices []ml.DeviceInfo) {
|
||||
for _, dev := range devices {
|
||||
var libs []string
|
||||
for _, dir := range dev.LibraryPath {
|
||||
- if strings.Contains(dir, filepath.Join("lib", "ollama")) {
|
||||
+ if strings.Contains(dir, filepath.Join("lib64", "ollama")) {
|
||||
libs = append(libs, filepath.Base(dir))
|
||||
}
|
||||
}
|
||||
diff --git a/ml/backend/ggml/ggml/src/ggml.go b/ml/backend/ggml/ggml/src/ggml.go
|
||||
index 7e21591..23f58a1 100644
|
||||
--- a/ml/backend/ggml/ggml/src/ggml.go
|
||||
+++ b/ml/backend/ggml/ggml/src/ggml.go
|
||||
@@ -65,7 +65,7 @@ var OnceLoad = sync.OnceFunc(func() {
|
||||
case "windows":
|
||||
value = filepath.Join(filepath.Dir(exe), "lib", "ollama")
|
||||
default:
|
||||
- value = filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
|
||||
+ value = filepath.Join(filepath.Dir(exe), "..", "lib64", "ollama")
|
||||
}
|
||||
|
||||
// Avoid potentially loading incompatible GGML libraries
|
||||
diff --git a/ml/path.go b/ml/path.go
|
||||
index ac93af4..3af726c 100644
|
||||
--- a/ml/path.go
|
||||
+++ b/ml/path.go
|
||||
@@ -28,7 +28,7 @@ var LibOllamaPath string = func() string {
|
||||
case "windows":
|
||||
libPath = filepath.Join(filepath.Dir(exe), "lib", "ollama")
|
||||
case "linux":
|
||||
- libPath = filepath.Join(filepath.Dir(exe), "..", "lib", "ollama")
|
||||
+ libPath = filepath.Join(filepath.Dir(exe), "..", "lib64", "ollama")
|
||||
case "darwin":
|
||||
libPath = filepath.Dir(exe)
|
||||
}
|
||||
--
|
||||
2.53.0
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 2820dee..44e0b43 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -28,7 +28,7 @@ set(GGML_CUDA_FA ON)
|
||||
set(GGML_CUDA_COMPRESSION_MODE default)
|
||||
|
||||
if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
|
||||
- OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))
|
||||
+ OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+|riscv64"))
|
||||
set(GGML_CPU_ALL_VARIANTS ON)
|
||||
endif()
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
From 85aee3e710288343eca393272a418483ac547b83 Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Tue, 24 Mar 2026 10:11:07 +0800
|
||||
Subject: [PATCH] limit batch size to stablize
|
||||
|
||||
---
|
||||
api/types.go | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/api/types.go b/api/types.go
|
||||
index 3ccf3ce..cba3ffd 100644
|
||||
--- a/api/types.go
|
||||
+++ b/api/types.go
|
||||
@@ -896,7 +896,7 @@ func DefaultOptions() Options {
|
||||
Runner: Runner{
|
||||
// options set when the model is loaded
|
||||
NumCtx: int(envconfig.ContextLength()),
|
||||
- NumBatch: 512,
|
||||
+ NumBatch: 8,
|
||||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||
NumThread: 0, // let the runtime decide
|
||||
UseMMap: nil,
|
||||
--
|
||||
2.53.0
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
[Unit]
|
||||
Description=Ollama Service
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/bin/ollama serve
|
||||
User=ollama
|
||||
Group=ollama
|
||||
Restart=always
|
||||
RestartSec=3
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
+81
-23
@@ -1,6 +1,7 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: misaka00251 <liuxin@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
@@ -11,24 +12,24 @@
|
||||
|
||||
# Ollama bundles some ggml libs
|
||||
# They should be kept private and the scans of these files should be disabled
|
||||
%global __provides_exclude_from ^%{_exec_prefix}/lib/ollama/.*\\.so(\\..*)?$
|
||||
%global __requires_exclude ^libggml-base\\.so\\.0\\(\\).*
|
||||
%global __provides_exclude lib.*\\.so(\\..*)?
|
||||
%global __requires_exclude libggml-.*\\.so(\\..*)?
|
||||
|
||||
Name: ollama
|
||||
Version: 0.13.5
|
||||
Release: %autorelease
|
||||
Summary: Get up and running with OpenAI gpt-oss, DeepSeek-R1, Gemma 3 and other models.
|
||||
License: Apache-2.0 AND MIT
|
||||
URL: https://github.com/ollama/ollama
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/refs/tags/v%{version}.tar.gz
|
||||
License: MIT
|
||||
URL: https://ollama.com/
|
||||
VCS: git:https://github.com/ollama/ollama
|
||||
#!RemoteAsset: sha256:6b6bc20a52c11341aa296eecce5ee6782f05815224a4196983b0aa2f1453c19f
|
||||
Source0: https://github.com/ollama/ollama/archive/refs/tags/v%{version}.tar.gz
|
||||
Source1: ollama.service
|
||||
Source2: ollama.sysusers
|
||||
BuildSystem: golang
|
||||
|
||||
BuildOption(prep): -n %{_name}-%{version}
|
||||
|
||||
Patch0: 0001-ollama-0.14.2_add-riscv.patch
|
||||
Patch1: 0002-go-riscv64.patch
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: fdupes
|
||||
BuildRequires: gcc-c++
|
||||
@@ -60,12 +61,15 @@ BuildRequires: go(golang.org/x/tools)
|
||||
BuildRequires: go(gonum.org/v1/gonum)
|
||||
BuildRequires: go(google.golang.org/protobuf)
|
||||
BuildRequires: ninja
|
||||
|
||||
BuildRequires: systemd-rpm-macros
|
||||
%if %{with rocm}
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(Clang)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hipblas)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(LLD)
|
||||
BuildRequires: cmake(LLVM)
|
||||
BuildRequires: cmake(rocblas)
|
||||
BuildRequires: cmake(rocsolver)
|
||||
BuildRequires: pkgconfig(libdrm_amdgpu)
|
||||
@@ -73,17 +77,37 @@ BuildRequires: pkgconfig(libelf)
|
||||
BuildRequires: pkgconfig(numa)
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: rocminfo
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: llvm-devel
|
||||
%endif
|
||||
|
||||
%{?systemd_requires}
|
||||
%if %{with rocm}
|
||||
Requires: hipblas
|
||||
Requires: rocblas
|
||||
%endif
|
||||
|
||||
%patchlist
|
||||
# Ollama vendors ggml code, but it does not sync riscv64 code by default
|
||||
# Manually sync riscv64 code here
|
||||
0001-ollama-0.14.2_add-riscv.patch
|
||||
# Ollama put ggml-cpu code(cpp) inside 'ollama' binary file(go)
|
||||
0002-go-riscv64.patch
|
||||
# Golang buildsystem on openRuyi use GO11MODULE=off, makes
|
||||
# httpmuxgo121=1, which is deprecated in newer version of go
|
||||
# Without this patch, ollama cannot provide even the basic http functions
|
||||
# https://github.com/jkroepke/openvpn-auth-oauth2/pull/706
|
||||
0003-disable-httpmuxgo121-on-newer-version-of-go.patch
|
||||
# This patch breaks dlopen of ollama, temporarily disable it
|
||||
# Install ollama to /usr/lib as workaround
|
||||
# 0004-use-lib64-instead-of-lib.patch
|
||||
# GGML_CPU_ALL_VARIANTS only supports x86_64
|
||||
0005-disable-cpu-variants.patch
|
||||
# Llama.cpp(ggml) on riscv64's ROCm frequently produce nonsense
|
||||
# Give parameter '-b 8 -ub 8' can stabilize it
|
||||
0006-limit-batch-size-to-stabilize.patch
|
||||
|
||||
%description
|
||||
Ollama is an open-source platform designed to run large language models locally.
|
||||
It allows users to generate text, assist with coding, and create content privately
|
||||
@@ -95,28 +119,62 @@ rm -rf llama/llama.cpp/vendor
|
||||
|
||||
# Ollama use a mix build of cmake and go.
|
||||
# Ollama binary built by go will use dlopen to load *.so built by cmake.
|
||||
# Building order is not important.
|
||||
# Building order of go/cmake is not important.
|
||||
%build -a
|
||||
cmake \
|
||||
-B build \
|
||||
%if %{with rocm}
|
||||
-DCMAKE_HIP_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DAMDGPU_TARGETS=%{rocm_gpu_list_default} \
|
||||
%endif
|
||||
%cmake \
|
||||
-G Ninja \
|
||||
-W no-dev
|
||||
cmake --build build --parallel
|
||||
-W no-dev \
|
||||
-DCMAKE_INSTALL_LIBDIR:PATH=lib \
|
||||
-DCMAKE_INSTALL_FULL_LIBDIR:PATH=/usr/lib \
|
||||
-DLIB_INSTALL_DIR:PATH=/usr/lib \
|
||||
-DLIB_SUFFIX= \
|
||||
%if %{with rocm}
|
||||
-DCMAKE_HIP_COMPILER=%{rocmllvm_bindir}/clang++ \
|
||||
-DAMDGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
%endif
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
%buildsystem_golang_install
|
||||
%cmake_install
|
||||
# Remove bundled contents
|
||||
rm -rvf %{buildroot}%{_bindir}/lib* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/libamd* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/libdrm* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/libelf* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/libhip* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/libhsa* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/libnuma* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/libroc* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/libroc* \
|
||||
%{buildroot}%{_exec_prefix}/lib/ollama/rocblas/
|
||||
|
||||
install -p -D -m 0644 %{SOURCE1} %{buildroot}%{_unitdir}/ollama.service
|
||||
install -p -D -m 0644 %{SOURCE2} %{buildroot}%{_sysusersdir}/ollama.conf
|
||||
# home dir
|
||||
mkdir -p %{buildroot}%{_var}/lib/ollama
|
||||
|
||||
%pre
|
||||
%sysusers_create_package ollama %{SOURCE2}
|
||||
|
||||
%preun
|
||||
%systemd_preun ollama.service
|
||||
|
||||
%post
|
||||
%systemd_post ollama.service
|
||||
|
||||
%postun
|
||||
%systemd_postun_with_restart ollama.service
|
||||
|
||||
%files
|
||||
%license LICENSE*
|
||||
%doc README*
|
||||
%{_bindir}/%{_name}
|
||||
%dir %{_exec_prefix}/lib/ollama
|
||||
%attr(0755,ollama,ollama) %dir %{_var}/lib/ollama/
|
||||
%{_bindir}/ollama
|
||||
%{_exec_prefix}/lib/ollama/*
|
||||
%{_unitdir}/ollama.service
|
||||
%{_sysusersdir}/ollama.conf
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
u ollama - "Runs Ollama" /var/lib/ollama /sbin/nologin
|
||||
@@ -0,0 +1,56 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global srcname mistral-common
|
||||
%global pypi_name mistral_common
|
||||
|
||||
Name: python-%{srcname}
|
||||
Version: 1.11.2
|
||||
Release: %autorelease
|
||||
Summary: Library of common utilities for Mistral AI
|
||||
License: Apache-2.0
|
||||
URL: https://github.com/mistralai/mistral-common
|
||||
#!RemoteAsset: sha256:79f68fc2d1190f28637f40e053f919c8c2697e00b2aa679ddee562a95183f4ad
|
||||
Source0: https://files.pythonhosted.org/packages/source/m/%{pypi_name}/%{pypi_name}-%{version}.tar.gz
|
||||
# Upstream does not ship the license file in the sdist, fetch it separately
|
||||
#!RemoteAsset: sha256:5ed6f79e77734b5a60740dd821af5ecac9a6f33709c860eea4e20fcb6cca7fcc
|
||||
Source1: https://raw.githubusercontent.com/mistralai/mistral-common/v%{version}/LICENCE
|
||||
BuildArch: noarch
|
||||
BuildSystem: pyproject
|
||||
|
||||
BuildOption(install): %{pypi_name}
|
||||
# These modules require the optional "server" extra (fastapi, click, pydantic-settings)
|
||||
BuildOption(check): -e 'mistral_common.experimental.app.*'
|
||||
|
||||
BuildRequires: pyproject-rpm-macros
|
||||
BuildRequires: pkgconfig(python3)
|
||||
BuildRequires: python3dist(pip)
|
||||
BuildRequires: python3dist(setuptools)
|
||||
BuildRequires: python3dist(wheel)
|
||||
|
||||
Provides: python3-%{srcname} = %{version}-%{release}
|
||||
%python_provide python3-%{srcname}
|
||||
|
||||
%description
|
||||
mistral-common is a library of common utilities for Mistral AI, providing
|
||||
tokenizers, request and response schemas, and validation helpers used across
|
||||
Mistral's models and tooling.
|
||||
|
||||
%prep -a
|
||||
cp -p %{SOURCE1} LICENCE
|
||||
# Relax jsonschema lower bound to match the version available in the repo
|
||||
sed -i 's/jsonschema>=4.21.1/jsonschema>=4.17.3/' pyproject.toml
|
||||
|
||||
%generate_buildrequires
|
||||
%pyproject_buildrequires
|
||||
|
||||
%files -f %{pyproject_files}
|
||||
%doc README.md
|
||||
%license LICENCE
|
||||
%{_bindir}/mistral_common
|
||||
|
||||
%changelog
|
||||
%autochangelog
|
||||
@@ -0,0 +1,48 @@
|
||||
# SPDX-FileCopyrightText: (C) 2025, 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2025, 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global srcname msgpack
|
||||
|
||||
Name: python-%{srcname}
|
||||
Version: 1.1.2
|
||||
Release: %autorelease
|
||||
Summary: Python MessagePack (de)serializer
|
||||
License: Apache-2.0
|
||||
URL: https://msgpack.org/
|
||||
#!RemoteAsset: sha256:3b60763c1373dd60f398488069bcdc703cd08a711477b5d480eecc9f9626f47e
|
||||
Source0: https://files.pythonhosted.org/packages/source/m/%{srcname}/%{srcname}-%{version}.tar.gz
|
||||
BuildSystem: pyproject
|
||||
|
||||
BuildOption(install): -l %{srcname}
|
||||
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: pyproject-rpm-macros
|
||||
BuildRequires: pkgconfig(python3)
|
||||
BuildRequires: python3dist(pip)
|
||||
BuildRequires: python3dist(setuptools)
|
||||
|
||||
Provides: python3-%{srcname} = %{version}-%{release}
|
||||
Provides: python3-%{srcname}%{?_isa} = %{version}-%{release}
|
||||
%python_provide python3-%{srcname}
|
||||
|
||||
%description
|
||||
MessagePack is a binary-based efficient data interchange format that is
|
||||
focused on high performance. It is like JSON, but very fast and small.
|
||||
This is a Python (de)serializer for MessagePack.
|
||||
|
||||
%prep -a
|
||||
# There is a circular dependency with python-msgpack-ext
|
||||
rm -rf test/test_timestamp.py
|
||||
|
||||
%generate_buildrequires
|
||||
%pyproject_buildrequires
|
||||
|
||||
%files -f %{pyproject_files}
|
||||
%doc README.md
|
||||
%license COPYING
|
||||
|
||||
%changelog
|
||||
%autochangelog
|
||||
@@ -0,0 +1,54 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global srcname pydantic-extra-types
|
||||
%global pypi_name pydantic_extra_types
|
||||
|
||||
Name: python-%{srcname}
|
||||
Version: 2.11.1
|
||||
Release: %autorelease
|
||||
Summary: Extra Pydantic types
|
||||
License: MIT
|
||||
URL: https://github.com/pydantic/pydantic-extra-types
|
||||
#!RemoteAsset: sha256:46792d2307383859e923d8fcefa82108b1a141f8a9c0198982b3832ab5ef1049
|
||||
Source0: https://files.pythonhosted.org/packages/source/p/%{pypi_name}/%{pypi_name}-%{version}.tar.gz
|
||||
BuildArch: noarch
|
||||
BuildSystem: pyproject
|
||||
|
||||
BuildOption(install): %{pypi_name}
|
||||
# Skip submodules whose optional dependencies are not packaged yet
|
||||
BuildOption(check): -e 'pydantic_extra_types.cron'
|
||||
BuildOption(check): -e 'pydantic_extra_types.mongo_object_id'
|
||||
BuildOption(check): -e 'pydantic_extra_types.pendulum_dt'
|
||||
BuildOption(check): -e 'pydantic_extra_types.phone_numbers'
|
||||
BuildOption(check): -e 'pydantic_extra_types.semantic_version'
|
||||
BuildOption(check): -e 'pydantic_extra_types.semver'
|
||||
BuildOption(check): -e 'pydantic_extra_types.ulid'
|
||||
|
||||
BuildRequires: pyproject-rpm-macros
|
||||
BuildRequires: pkgconfig(python3)
|
||||
BuildRequires: python3dist(hatchling)
|
||||
BuildRequires: python3dist(pip)
|
||||
BuildRequires: python3dist(pycountry)
|
||||
BuildRequires: python3dist(setuptools)
|
||||
|
||||
Provides: python3-%{srcname} = %{version}-%{release}
|
||||
%python_provide python3-%{srcname}
|
||||
|
||||
%description
|
||||
Extra Pydantic types provides a collection of additional field types and
|
||||
validators for Pydantic, such as country codes, phone numbers, colors,
|
||||
coordinates and currency codes.
|
||||
|
||||
%generate_buildrequires
|
||||
%pyproject_buildrequires
|
||||
|
||||
%files -f %{pyproject_files}
|
||||
%doc README.md
|
||||
%license LICENSE
|
||||
|
||||
%changelog
|
||||
%autochangelog
|
||||
@@ -1,55 +0,0 @@
|
||||
From 353550790f659b320ea8753b7d4a6fd701bd1a79 Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Fri, 6 Mar 2026 16:36:36 +0800
|
||||
Subject: [PATCH 1/6] fix python shebang
|
||||
|
||||
---
|
||||
Tensile/Configs/miopen/convert_cfg.py | 2 +-
|
||||
Tensile/Tests/create_tests.py | 2 +-
|
||||
Tensile/bin/Tensile | 2 +-
|
||||
Tensile/bin/TensileCreateLibrary | 2 +-
|
||||
4 files changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/Tensile/Configs/miopen/convert_cfg.py b/Tensile/Configs/miopen/convert_cfg.py
|
||||
index c62d26f..3b5c114 100644
|
||||
--- a/Tensile/Configs/miopen/convert_cfg.py
|
||||
+++ b/Tensile/Configs/miopen/convert_cfg.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/python
|
||||
+#!/usr/bin/python3
|
||||
|
||||
################################################################################
|
||||
#
|
||||
diff --git a/Tensile/Tests/create_tests.py b/Tensile/Tests/create_tests.py
|
||||
index 2b08e3f..94f7345 100755
|
||||
--- a/Tensile/Tests/create_tests.py
|
||||
+++ b/Tensile/Tests/create_tests.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/python
|
||||
+#!/usr/bin/python3
|
||||
|
||||
################################################################################
|
||||
#
|
||||
diff --git a/Tensile/bin/Tensile b/Tensile/bin/Tensile
|
||||
index 1c53682..2ac7d57 100755
|
||||
--- a/Tensile/bin/Tensile
|
||||
+++ b/Tensile/bin/Tensile
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python3
|
||||
+#!/usr/bin/python3
|
||||
|
||||
################################################################################
|
||||
#
|
||||
diff --git a/Tensile/bin/TensileCreateLibrary b/Tensile/bin/TensileCreateLibrary
|
||||
index e90be28..8e966c3 100755
|
||||
--- a/Tensile/bin/TensileCreateLibrary
|
||||
+++ b/Tensile/bin/TensileCreateLibrary
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python3
|
||||
+#!/usr/bin/python3
|
||||
|
||||
################################################################################
|
||||
#
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
From 9d2c031ba924572914b72794f94f1def07aa225c Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Fri, 6 Mar 2026 16:37:40 +0800
|
||||
Subject: [PATCH 2/6] fix tensile get path
|
||||
|
||||
---
|
||||
Tensile/cmake/TensileConfig.cmake | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/Tensile/cmake/TensileConfig.cmake b/Tensile/cmake/TensileConfig.cmake
|
||||
index 62682d7..de275b0 100644
|
||||
--- a/Tensile/cmake/TensileConfig.cmake
|
||||
+++ b/Tensile/cmake/TensileConfig.cmake
|
||||
@@ -45,7 +45,7 @@ if(NOT DEFINED Tensile_ROOT)
|
||||
if (WIN32)
|
||||
execute_process(COMMAND "${Tensile_PREFIX}/bin/TensileGetPath.exe" OUTPUT_VARIABLE Tensile_ROOT)
|
||||
else()
|
||||
- execute_process(COMMAND "${Tensile_PREFIX}/bin/TensileGetPath" OUTPUT_VARIABLE Tensile_ROOT)
|
||||
+ execute_process(COMMAND "TensileGetPath" OUTPUT_VARIABLE Tensile_ROOT)
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND CMAKE_MODULE_PATH "${Tensile_ROOT}/Source/cmake/")
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
From b1a90000e009daf6c91dc9e0837a36d9f4735a34 Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Fri, 6 Mar 2026 16:39:22 +0800
|
||||
Subject: [PATCH 3/6] reduce requirements
|
||||
|
||||
---
|
||||
docs/sphinx/requirements.in | 1 -
|
||||
docs/sphinx/requirements.txt | 2 --
|
||||
2 files changed, 3 deletions(-)
|
||||
|
||||
diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in
|
||||
index 4184a90..f818da0 100644
|
||||
--- a/docs/sphinx/requirements.in
|
||||
+++ b/docs/sphinx/requirements.in
|
||||
@@ -1,3 +1,2 @@
|
||||
rocm-docs-core==1.20.0
|
||||
autodoc
|
||||
-joblib # Required dependency for API doc-string generation
|
||||
diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt
|
||||
index e9b7e28..2dd28d9 100644
|
||||
--- a/docs/sphinx/requirements.txt
|
||||
+++ b/docs/sphinx/requirements.txt
|
||||
@@ -91,8 +91,6 @@ jinja2==3.1.4
|
||||
# via
|
||||
# myst-parser
|
||||
# sphinx
|
||||
-joblib==1.5.1
|
||||
- # via -r requirements.in
|
||||
jsonschema==4.23.0
|
||||
# via nbformat
|
||||
jsonschema-specifications==2024.10.1
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
From 5e9b360710400fcace517f055edf54da2ff0e076 Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Fri, 6 Mar 2026 16:41:52 +0800
|
||||
Subject: [PATCH 4/6] ignore asm cap cache
|
||||
|
||||
---
|
||||
Tensile/Common.py | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index 86c6c57..376afc4 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -307,7 +307,7 @@ globalParameters["SeparateArchitectures"] = False # write Tensile library metada
|
||||
|
||||
globalParameters["LazyLibraryLoading"] = False # Load library and code object files when needed instead of at startup
|
||||
|
||||
-globalParameters["IgnoreAsmCapCache"] = False # Ignore checking for discrepancies between derived and cached asm caps
|
||||
+globalParameters["IgnoreAsmCapCache"] = True # Ignore checking for discrepancies between derived and cached asm caps
|
||||
|
||||
globalParameters["ExperimentalLogicDir"] = "/experimental/"
|
||||
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
From 746af14c11ee1b455f1adbab8bf6bc3c93fb3fde Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Fri, 6 Mar 2026 16:45:10 +0800
|
||||
Subject: [PATCH 5/6] no amdclang when rocm-llvm is unbundled
|
||||
|
||||
---
|
||||
Tensile/Common.py | 4 ++--
|
||||
Tensile/Utilities/Toolchain.py | 10 +++++-----
|
||||
2 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index 376afc4..90c579e 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -269,7 +269,7 @@ globalParameters["DictLibraryLogic"] = False
|
||||
globalParameters["CurrentISA"] = (0,0,0)
|
||||
globalParameters["ROCmAgentEnumeratorPath"] = None # /opt/rocm/bin/rocm_agent_enumerator
|
||||
globalParameters["ROCmSMIPath"] = None # /opt/rocm/bin/rocm-smi
|
||||
-globalParameters["AssemblerPath"] = None # /opt/rocm/llvm/bin/clang++
|
||||
+globalParameters["AssemblerPath"] = "clang++" # /opt/rocm/llvm/bin/clang++
|
||||
globalParameters["WorkingPath"] = os.getcwd() # path where tensile called from
|
||||
globalParameters["IndexChars"] = "IJKLMNOPQRSTUVWXYZ" # which characters to use for C[ij]=Sum[k] A[ik]*B[jk]
|
||||
globalParameters["ScriptPath"] = os.path.dirname(os.path.realpath(__file__)) # path to Tensile/Tensile.py
|
||||
@@ -279,7 +279,7 @@ globalParameters["HipClangVersion"] = "0.0.0"
|
||||
globalParameters["RuntimeLanguage"] = "HIP"
|
||||
|
||||
globalParameters["CodeObjectVersion"] = "default"
|
||||
-globalParameters["CxxCompiler"] = "amdclang++" if os.name != "nt" else "clang++"
|
||||
+globalParameters["CxxCompiler"] = "hipcc" if os.name != "nt" else "clang++"
|
||||
globalParameters["CCompiler"] = "amdclang" if os.name != "nt" else "clang"
|
||||
globalParameters["Architecture"] = "all"
|
||||
|
||||
diff --git a/Tensile/Utilities/Toolchain.py b/Tensile/Utilities/Toolchain.py
|
||||
index ee9cbee..e3de82b 100644
|
||||
--- a/Tensile/Utilities/Toolchain.py
|
||||
+++ b/Tensile/Utilities/Toolchain.py
|
||||
@@ -106,10 +106,10 @@ def _posixSearchPaths() -> List[Path]:
|
||||
|
||||
|
||||
class ToolchainDefaults(NamedTuple):
|
||||
- CXX_COMPILER = osSelect(linux="amdclang++", windows="clang++.exe")
|
||||
- C_COMPILER = osSelect(linux="amdclang", windows="clang.exe")
|
||||
+ CXX_COMPILER = osSelect(linux="hipcc", windows="clang++.exe")
|
||||
+ C_COMPILER = osSelect(linux="clang", windows="clang.exe")
|
||||
OFFLOAD_BUNDLER = osSelect(linux="clang-offload-bundler", windows="clang-offload-bundler.exe")
|
||||
- ASSEMBLER = osSelect(linux="amdclang++", windows="clang++.exe")
|
||||
+ ASSEMBLER = osSelect(linux="clang++", windows="clang++.exe")
|
||||
HIP_CONFIG = osSelect(linux="hipconfig", windows="hipconfig")
|
||||
DEVICE_ENUMERATOR = osSelect(linux="rocm_agent_enumerator", windows="hipinfo.exe")
|
||||
|
||||
@@ -132,7 +132,7 @@ def supportedCCompiler(compiler: str) -> bool:
|
||||
Return:
|
||||
If supported True; otherwise, False.
|
||||
"""
|
||||
- return _supportedComponent(compiler, ["amdclang", "clang", "hipcc"])
|
||||
+ return _supportedComponent(compiler, ["clang", "clang", "hipcc"])
|
||||
|
||||
|
||||
def supportedCxxCompiler(compiler: str) -> bool:
|
||||
@@ -144,7 +144,7 @@ def supportedCxxCompiler(compiler: str) -> bool:
|
||||
Return:
|
||||
If supported True; otherwise, False.
|
||||
"""
|
||||
- return _supportedComponent(compiler, ["amdclang++", "clang++", "hipcc"])
|
||||
+ return _supportedComponent(compiler, ["clang++", "clang++", "hipcc"])
|
||||
|
||||
|
||||
def supportedOffloadBundler(bundler: str) -> bool:
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
From dfefd5482684998206290e2e62dc0c84dcc7d64e Mon Sep 17 00:00:00 2001
|
||||
From: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
Date: Fri, 6 Mar 2026 16:49:43 +0800
|
||||
Subject: [PATCH 6/6] use system path instead of default
|
||||
|
||||
---
|
||||
Tensile/Common.py | 2 +-
|
||||
Tensile/Source/CMakeLists.txt | 4 ++--
|
||||
Tensile/Source/FindHIP.cmake | 4 ++--
|
||||
Tensile/Source/cmake/FindROCmSMI.cmake | 2 +-
|
||||
Tensile/Tests/hipModuleLoad_timing/Makefile | 6 +++---
|
||||
Tensile/Utilities/Toolchain.py | 5 ++---
|
||||
6 files changed, 11 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/Tensile/Common.py b/Tensile/Common.py
|
||||
index 90c579e..3589e01 100644
|
||||
--- a/Tensile/Common.py
|
||||
+++ b/Tensile/Common.py
|
||||
@@ -2415,7 +2415,7 @@ def assignGlobalParameters( config, capabilitiesCache: Optional[dict] = None ):
|
||||
if "KeepBuildTmp" in config:
|
||||
globalParameters["KeepBuildTmp"] = config["KeepBuildTmp"]
|
||||
|
||||
- globalParameters["ROCmPath"] = "/opt/rocm"
|
||||
+ globalParameters["ROCmPath"] = "/usr"
|
||||
if "ROCM_PATH" in os.environ:
|
||||
globalParameters["ROCmPath"] = os.environ.get("ROCM_PATH")
|
||||
if "TENSILE_ROCM_PATH" in os.environ:
|
||||
diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt
|
||||
index b96e308..c756756 100644
|
||||
--- a/Tensile/Source/CMakeLists.txt
|
||||
+++ b/Tensile/Source/CMakeLists.txt
|
||||
@@ -26,7 +26,7 @@ cmake_minimum_required(VERSION 3.13)
|
||||
|
||||
# Override all paths arguments as they do not work properly
|
||||
file(TO_CMAKE_PATH "$ENV{ROCM_PATH}" ROCM_PATH_ENV_VALUE)
|
||||
-list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH_ENV_VALUE} /opt/rocm)
|
||||
+list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH_ENV_VALUE} /usr)
|
||||
|
||||
project(Tensile)
|
||||
|
||||
@@ -65,7 +65,7 @@ CMAKE_DEPENDENT_OPTION(TENSILE_BUILD_CLIENT "Build the benchmarking client" ON
|
||||
"TENSILE_USE_HIP" OFF)
|
||||
|
||||
if(TENSILE_USE_HIP)
|
||||
- find_package(HIP REQUIRED CONFIG PATHS ${ROCM_PATH_ENV_VALUE} /opt/rocm)
|
||||
+ find_package(HIP REQUIRED CONFIG PATHS ${ROCM_PATH_ENV_VALUE} /usr)
|
||||
endif()
|
||||
|
||||
if(TENSILE_USE_OPENMP)
|
||||
diff --git a/Tensile/Source/FindHIP.cmake b/Tensile/Source/FindHIP.cmake
|
||||
index d299357..ba8597f 100644
|
||||
--- a/Tensile/Source/FindHIP.cmake
|
||||
+++ b/Tensile/Source/FindHIP.cmake
|
||||
@@ -79,7 +79,7 @@ else()
|
||||
hip/hip_runtime.h
|
||||
PATHS
|
||||
ENV HIP_PATH
|
||||
- /opt/rocm
|
||||
+ /usr
|
||||
PATH_SUFFIXES
|
||||
/include/hip
|
||||
/include
|
||||
@@ -98,7 +98,7 @@ else()
|
||||
NAMES hipcc
|
||||
PATHS
|
||||
ENV HIP_PATH
|
||||
- /opt/rocm
|
||||
+ /usr
|
||||
PATH_SUFFIXES
|
||||
/bin
|
||||
)
|
||||
diff --git a/Tensile/Source/cmake/FindROCmSMI.cmake b/Tensile/Source/cmake/FindROCmSMI.cmake
|
||||
index 0498766..071232a 100644
|
||||
--- a/Tensile/Source/cmake/FindROCmSMI.cmake
|
||||
+++ b/Tensile/Source/cmake/FindROCmSMI.cmake
|
||||
@@ -24,7 +24,7 @@
|
||||
|
||||
if(NOT ROCM_ROOT)
|
||||
if(NOT ROCM_DIR)
|
||||
- set(ROCM_ROOT "/opt/rocm")
|
||||
+ set(ROCM_ROOT "/usr")
|
||||
else()
|
||||
set(ROCM_DIR "${ROCM_DIR}/../../..")
|
||||
endif()
|
||||
diff --git a/Tensile/Tests/hipModuleLoad_timing/Makefile b/Tensile/Tests/hipModuleLoad_timing/Makefile
|
||||
index 671167d..2177143 100644
|
||||
--- a/Tensile/Tests/hipModuleLoad_timing/Makefile
|
||||
+++ b/Tensile/Tests/hipModuleLoad_timing/Makefile
|
||||
@@ -22,10 +22,10 @@
|
||||
#
|
||||
################################################################################
|
||||
|
||||
-CXX?=/opt/rocm/hip/bin/amdclang++
|
||||
-LIBFLAGS=-L/opt/rocm/hip/lib/
|
||||
+CXX?=/usr/bin/amdclang++
|
||||
+LIBFLAGS=-L/usr/lib64/
|
||||
LIBS=-lamdhip64
|
||||
-INCFLAGS=-I/opt/rocm/hip/include/
|
||||
+INCFLAGS=-I/usr/include/
|
||||
|
||||
hipModuleLoadTiming.out: hipModuleLoadTiming.o
|
||||
$(CXX) -o $@ $(LIBFLAGS) $^
|
||||
diff --git a/Tensile/Utilities/Toolchain.py b/Tensile/Utilities/Toolchain.py
|
||||
index e3de82b..e6ee7f3 100644
|
||||
--- a/Tensile/Utilities/Toolchain.py
|
||||
+++ b/Tensile/Utilities/Toolchain.py
|
||||
@@ -29,8 +29,8 @@ from subprocess import PIPE, run
|
||||
from typing import List, NamedTuple, Union
|
||||
from warnings import warn
|
||||
|
||||
-DEFAULT_ROCM_BIN_PATH_POSIX = Path("/opt/rocm/bin")
|
||||
-DEFAULT_ROCM_LLVM_BIN_PATH_POSIX = Path("/opt/rocm/lib/llvm/bin")
|
||||
+DEFAULT_ROCM_BIN_PATH_POSIX = Path("/usr/bin")
|
||||
+DEFAULT_ROCM_LLVM_BIN_PATH_POSIX = Path("/usr/bin")
|
||||
DEFAULT_ROCM_BIN_PATH_WINDOWS = Path("C:/Program Files/AMD/ROCm")
|
||||
|
||||
|
||||
@@ -89,7 +89,6 @@ def _posixSearchPaths() -> List[Path]:
|
||||
if os.environ.get("ROCM_PATH"):
|
||||
for p in os.environ["ROCM_PATH"].split(os.pathsep):
|
||||
searchPaths.append(Path(p) / "bin")
|
||||
- searchPaths.append(Path(p) / "lib" / "llvm" / "bin")
|
||||
|
||||
searchPaths.extend(
|
||||
[
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global srcname tensile
|
||||
%global upstreamname Tensile
|
||||
%global rocm_version 7.1.1
|
||||
|
||||
Name: python-%{srcname}
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: Tool for creating benchmark-driven backend libraries for GEMMs
|
||||
License: MIT
|
||||
URL: https://github.com/ROCm/Tensile
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildSystem: pyproject
|
||||
|
||||
BuildOption(install): -l %{upstreamname}
|
||||
|
||||
BuildRequires: python3-devel
|
||||
|
||||
Requires: cmake-filesystem
|
||||
Requires: hipcc
|
||||
Requires: rocminfo
|
||||
Requires: python3dist(msgpack)
|
||||
Requires: python3dist(pyyaml)
|
||||
|
||||
Provides: python3-%{srcname}
|
||||
%python_provide python3-%{srcname}
|
||||
|
||||
%patchlist
|
||||
0001-fix-python-shebang.patch
|
||||
0002-fix-tensile-get-path.patch
|
||||
# TODO: joblib is not enabled on openRuyi
|
||||
0003-reduce-requirements.patch
|
||||
0004-ignore-asm-cap-cache.patch
|
||||
# no bundled clang is used on openRuyi
|
||||
0005-no-amdclang-when-rocm-llvm-is-unbundled.patch
|
||||
# /opt is not used on openRuyi packaging
|
||||
0006-use-system-path-instead-of-default.patch
|
||||
|
||||
%description
|
||||
Tensile is a tool for creating benchmark-driven backend libraries for GEMMs,
|
||||
GEMM-like problems (such as batched GEMM), and general N-dimensional tensor
|
||||
contractions on a GPU. The Tensile library is mainly used as backend library to
|
||||
rocBLAS. Tensile acts as the performance backbone for a wide variety of
|
||||
'compute' applications running on AMD GPUs.
|
||||
|
||||
%prep -a
|
||||
#Fix a few things:
|
||||
chmod 755 Tensile/Configs/miopen/convert_cfg.py
|
||||
|
||||
%generate_buildrequires
|
||||
%pyproject_buildrequires
|
||||
|
||||
%install -a
|
||||
# /usr/cmake/* -> /usr/lib/cmake/Tensile
|
||||
mkdir -p %{buildroot}%{_datadir}/cmake/Tensile
|
||||
mv %{buildroot}%{_prefix}/cmake/* %{buildroot}%{_datadir}/cmake/Tensile/
|
||||
rm -rf %{buildroot}%{_prefix}/cmake
|
||||
|
||||
# Do not distribute broken bins
|
||||
rm %{buildroot}%{_bindir}/tensile*
|
||||
|
||||
# rm hard links and replace
|
||||
rm %{buildroot}%{python3_sitelib}/%{upstreamname}/cmake/*.cmake
|
||||
mv %{buildroot}%{_datadir}/cmake/Tensile/*.cmake %{buildroot}%{python3_sitelib}/%{upstreamname}/cmake/
|
||||
|
||||
%pyproject_save_files %{upstreamname}
|
||||
|
||||
%check
|
||||
# 1. tensile requires GPU hardware at runtime
|
||||
# 2. optional dependencies (joblib) are intentionally excluded
|
||||
|
||||
%files -f %{pyproject_files}
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
# Do not distribute tests
|
||||
%exclude %{python3_sitelib}/%{upstreamname}/Tests
|
||||
%{_bindir}/Tensile
|
||||
%{_bindir}/TensileBenchmarkCluster
|
||||
%{_bindir}/TensileCreateLibrary
|
||||
%{_bindir}/TensileGetPath
|
||||
%{_bindir}/TensileRetuneLibrary
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
@@ -0,0 +1,45 @@
|
||||
--- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
|
||||
+++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp
|
||||
@@ -53,11 +53,11 @@
|
||||
} initializer;
|
||||
} // namespace (anonymous)
|
||||
|
||||
-#define AT_MAGMA_VERSION MAGMA_VERSION_MAJOR*100 + MAGMA_VERSION_MINOR*10 + MAGMA_VERSION_MICRO
|
||||
+#define AT_MAGMA_VERSION MAGMA_VERSION_MAJOR*10000 + MAGMA_VERSION_MINOR*100 + MAGMA_VERSION_MICRO
|
||||
|
||||
-// Check that MAGMA never releases MAGMA_VERSION_MINOR >= 10 or MAGMA_VERSION_MICRO >= 10
|
||||
-#if MAGMA_VERSION_MINOR >= 10 || MAGMA_VERSION_MICRO >= 10
|
||||
-#error "MAGMA release minor or micro version >= 10, please correct AT_MAGMA_VERSION"
|
||||
+// Check that MAGMA never releases MAGMA_VERSION_MINOR >= 100 or MAGMA_VERSION_MICRO >= 100
|
||||
+#if MAGMA_VERSION_MINOR >= 100 || MAGMA_VERSION_MICRO >= 100
|
||||
+#error "MAGMA release minor or micro version >= 100, please correct AT_MAGMA_VERSION"
|
||||
#endif
|
||||
|
||||
#else
|
||||
@@ -153,7 +153,7 @@
|
||||
scalar_t** dB_array, magma_int_t lddb, magma_int_t& info,
|
||||
magma_int_t batchsize, const MAGMAQueue& magma_queue, magma_trans_t trans);
|
||||
|
||||
-#if AT_MAGMA_VERSION >= 254
|
||||
+#if AT_MAGMA_VERSION >= 20504
|
||||
|
||||
template <>
|
||||
void magmaLdlHermitian<double>(
|
||||
@@ -209,7 +209,7 @@
|
||||
AT_CUDA_CHECK(cudaGetLastError());
|
||||
}
|
||||
|
||||
-#endif // AT_MAGMA_VERSION >= 254
|
||||
+#endif // AT_MAGMA_VERSION >= 20504
|
||||
|
||||
template<>
|
||||
void magmaLu<double>(
|
||||
@@ -818,7 +818,7 @@
|
||||
// If cusolver and magma 2.5.4+ are both available and hermitian=true,
|
||||
// call magma for complex inputs
|
||||
#ifdef USE_LINALG_SOLVER
|
||||
-#if AT_MAGMA_ENABLED() && (AT_MAGMA_VERSION >= 254)
|
||||
+#if AT_MAGMA_ENABLED() && (AT_MAGMA_VERSION >= 20504)
|
||||
if (LD.is_complex() && hermitian) {
|
||||
return ldl_factor_magma(
|
||||
LD, pivots, info, upper, hermitian);
|
||||
@@ -0,0 +1,350 @@
|
||||
#
|
||||
# License Details
|
||||
# Main license BSD 3-Clause
|
||||
#
|
||||
# Apache-2.0
|
||||
# android/libs/fbjni/LICENSE
|
||||
# android/libs/fbjni/CMakeLists.txt
|
||||
# android/libs/fbjni/build.gradle
|
||||
# android/libs/fbjni/cxx/fbjni/ByteBuffer.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/ByteBuffer.h
|
||||
# android/libs/fbjni/cxx/fbjni/Context.h
|
||||
# android/libs/fbjni/cxx/fbjni/File.h
|
||||
# android/libs/fbjni/cxx/fbjni/JThread.h
|
||||
# android/libs/fbjni/cxx/fbjni/NativeRunnable.h
|
||||
# android/libs/fbjni/cxx/fbjni/OnLoad.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/ReadableByteChannel.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Boxed.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Common.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/CoreClasses-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/CoreClasses.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Environment.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Environment.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Exceptions.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Exceptions.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/FbjniApi.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Hybrid.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Hybrid.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Iterator-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Iterator.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/JWeakReference.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Log.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Meta-forward.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Meta-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Meta.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Meta.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/MetaConvert.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/ReferenceAllocators.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/References-forward.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/References-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/References.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/References.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Registration-inl.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/Registration.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/SimpleFixedString.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/TypeTraits.h
|
||||
# android/libs/fbjni/cxx/fbjni/detail/utf8.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/detail/utf8.h
|
||||
# android/libs/fbjni/cxx/fbjni/fbjni.cpp
|
||||
# android/libs/fbjni/cxx/fbjni/fbjni.h
|
||||
# android/libs/fbjni/cxx/lyra/cxa_throw.cpp
|
||||
# android/libs/fbjni/cxx/lyra/lyra.cpp
|
||||
# android/libs/fbjni/cxx/lyra/lyra.h
|
||||
# android/libs/fbjni/cxx/lyra/lyra_breakpad.cpp
|
||||
# android/libs/fbjni/cxx/lyra/lyra_exceptions.cpp
|
||||
# android/libs/fbjni/cxx/lyra/lyra_exceptions.h
|
||||
# android/libs/fbjni/gradle.properties
|
||||
# android/libs/fbjni/gradle/android-tasks.gradle
|
||||
# android/libs/fbjni/gradle/release.gradle
|
||||
# android/libs/fbjni/gradlew
|
||||
# android/libs/fbjni/gradlew.bat
|
||||
# android/libs/fbjni/host.gradle
|
||||
# android/libs/fbjni/java/com/facebook/jni/CppException.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/CppSystemErrorException.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/DestructorThread.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/HybridClassBase.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/HybridData.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/IteratorHelper.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/MapIteratorHelper.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/NativeRunnable.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/ThreadScopeSupport.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/UnknownCppException.java
|
||||
# android/libs/fbjni/java/com/facebook/jni/annotations/DoNotStrip.java
|
||||
# android/libs/fbjni/scripts/android-setup.sh
|
||||
# android/libs/fbjni/scripts/run-host-tests.sh
|
||||
# android/libs/fbjni/settings.gradle
|
||||
# android/libs/fbjni/test/BaseFBJniTests.java
|
||||
# android/libs/fbjni/test/ByteBufferTests.java
|
||||
# android/libs/fbjni/test/DocTests.java
|
||||
# android/libs/fbjni/test/FBJniTests.java
|
||||
# android/libs/fbjni/test/HybridTests.java
|
||||
# android/libs/fbjni/test/IteratorTests.java
|
||||
# android/libs/fbjni/test/PrimitiveArrayTests.java
|
||||
# android/libs/fbjni/test/ReadableByteChannelTests.java
|
||||
# android/libs/fbjni/test/jni/CMakeLists.txt
|
||||
# android/libs/fbjni/test/jni/byte_buffer_tests.cpp
|
||||
# android/libs/fbjni/test/jni/doc_tests.cpp
|
||||
# android/libs/fbjni/test/jni/expect.h
|
||||
# android/libs/fbjni/test/jni/fbjni_onload.cpp
|
||||
# android/libs/fbjni/test/jni/fbjni_tests.cpp
|
||||
# android/libs/fbjni/test/jni/hybrid_tests.cpp
|
||||
# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.cpp
|
||||
# android/libs/fbjni/test/jni/inter_dso_exception_test_1/Test.h
|
||||
# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.cpp
|
||||
# android/libs/fbjni/test/jni/inter_dso_exception_test_2/Test.h
|
||||
# android/libs/fbjni/test/jni/iterator_tests.cpp
|
||||
# android/libs/fbjni/test/jni/modified_utf8_test.cpp
|
||||
# android/libs/fbjni/test/jni/no_rtti.cpp
|
||||
# android/libs/fbjni/test/jni/no_rtti.h
|
||||
# android/libs/fbjni/test/jni/primitive_array_tests.cpp
|
||||
# android/libs/fbjni/test/jni/readable_byte_channel_tests.cpp
|
||||
# android/libs/fbjni/test/jni/simple_fixed_string_tests.cpp
|
||||
# android/libs/fbjni/test/jni/utf16toUTF8_test.cpp
|
||||
# android/pytorch_android/host/build.gradle
|
||||
# aten/src/ATen/cuda/llvm_basic.cpp
|
||||
# aten/src/ATen/cuda/llvm_complex.cpp
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/confu.yaml
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-neon.c
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-scalar.h
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/src/requantization/gemmlowp-sse.h
|
||||
# aten/src/ATen/nnapi/codegen.py
|
||||
# aten/src/ATen/nnapi/NeuralNetworks.h
|
||||
# aten/src/ATen/nnapi/nnapi_wrapper.cpp
|
||||
# aten/src/ATen/nnapi/nnapi_wrapper.h
|
||||
# binaries/benchmark_args.h
|
||||
# binaries/benchmark_helper.cc
|
||||
# binaries/benchmark_helper.h
|
||||
# binaries/compare_models_torch.cc
|
||||
# binaries/convert_and_benchmark.cc
|
||||
# binaries/convert_caffe_image_db.cc
|
||||
# binaries/convert_db.cc
|
||||
# binaries/convert_encoded_to_raw_leveldb.cc
|
||||
# binaries/convert_image_to_tensor.cc
|
||||
# binaries/core_overhead_benchmark.cc
|
||||
# binaries/core_overhead_benchmark_gpu.cc
|
||||
# binaries/db_throughput.cc
|
||||
# binaries/dump_operator_names.cc
|
||||
# binaries/inspect_gpu.cc
|
||||
# binaries/load_benchmark_torch.cc
|
||||
# binaries/make_cifar_db.cc
|
||||
# binaries/make_image_db.cc
|
||||
# binaries/make_mnist_db.cc
|
||||
# binaries/optimize_for_mobile.cc
|
||||
# binaries/parallel_info.cc
|
||||
# binaries/predictor_verifier.cc
|
||||
# binaries/print_core_object_sizes_gpu.cc
|
||||
# binaries/print_registered_core_operators.cc
|
||||
# binaries/run_plan.cc
|
||||
# binaries/run_plan_mpi.cc
|
||||
# binaries/speed_benchmark.cc
|
||||
# binaries/speed_benchmark_torch.cc
|
||||
# binaries/split_db.cc
|
||||
# binaries/tsv_2_proto.cc
|
||||
# binaries/tutorial_blob.cc
|
||||
# binaries/zmq_feeder.cc
|
||||
# c10/test/util/small_vector_test.cpp
|
||||
# c10/util/FunctionRef.h
|
||||
# c10/util/SmallVector.cpp
|
||||
# c10/util/SmallVector.h
|
||||
# c10/util/llvmMathExtras.h
|
||||
# c10/util/sparse_bitset.h
|
||||
# caffe2/contrib/aten/gen_op.py
|
||||
# caffe2/contrib/fakelowp/fp16_fc_acc_op.cc
|
||||
# caffe2/contrib/fakelowp/fp16_fc_acc_op.h
|
||||
# caffe2/contrib/gloo/allgather_ops.cc
|
||||
# caffe2/contrib/gloo/allgather_ops.h
|
||||
# caffe2/contrib/gloo/reduce_scatter_ops.cc
|
||||
# caffe2/contrib/gloo/reduce_scatter_ops.h
|
||||
# caffe2/core/hip/common_miopen.h
|
||||
# caffe2/core/hip/common_miopen.hip
|
||||
# caffe2/core/net_async_tracing.cc
|
||||
# caffe2/core/net_async_tracing.h
|
||||
# caffe2/core/net_async_tracing_test.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_decomposition.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_decomposition.h
|
||||
# caffe2/experiments/operators/fully_connected_op_decomposition_gpu.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_prune.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_prune.h
|
||||
# caffe2/experiments/operators/fully_connected_op_sparse.cc
|
||||
# caffe2/experiments/operators/fully_connected_op_sparse.h
|
||||
# caffe2/experiments/operators/funhash_op.cc
|
||||
# caffe2/experiments/operators/funhash_op.h
|
||||
# caffe2/experiments/operators/sparse_funhash_op.cc
|
||||
# caffe2/experiments/operators/sparse_funhash_op.h
|
||||
# caffe2/experiments/operators/sparse_matrix_reshape_op.cc
|
||||
# caffe2/experiments/operators/sparse_matrix_reshape_op.h
|
||||
# caffe2/experiments/operators/tt_contraction_op.cc
|
||||
# caffe2/experiments/operators/tt_contraction_op.h
|
||||
# caffe2/experiments/operators/tt_contraction_op_gpu.cc
|
||||
# caffe2/experiments/operators/tt_pad_op.cc
|
||||
# caffe2/experiments/operators/tt_pad_op.h
|
||||
# caffe2/experiments/python/SparseTransformer.py
|
||||
# caffe2/experiments/python/convnet_benchmarks.py
|
||||
# caffe2/experiments/python/device_reduce_sum_bench.py
|
||||
# caffe2/experiments/python/funhash_op_test.py
|
||||
# caffe2/experiments/python/net_construct_bench.py
|
||||
# caffe2/experiments/python/sparse_funhash_op_test.py
|
||||
# caffe2/experiments/python/sparse_reshape_op_test.py
|
||||
# caffe2/experiments/python/tt_contraction_op_test.py
|
||||
# caffe2/experiments/python/tt_pad_op_test.py
|
||||
# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vk_platform.h
|
||||
# caffe2/mobile/contrib/libvulkan-stub/include/vulkan/vulkan.h
|
||||
# caffe2/mobile/contrib/nnapi/NeuralNetworks.h
|
||||
# caffe2/mobile/contrib/nnapi/dlnnapi.c
|
||||
# caffe2/mobile/contrib/nnapi/nnapi_benchmark.cc
|
||||
# caffe2/observers/profile_observer.cc
|
||||
# caffe2/observers/profile_observer.h
|
||||
# caffe2/operators/hip/conv_op_miopen.hip
|
||||
# caffe2/operators/hip/local_response_normalization_op_miopen.hip
|
||||
# caffe2/operators/hip/pool_op_miopen.hip
|
||||
# caffe2/operators/hip/spatial_batch_norm_op_miopen.hip
|
||||
# caffe2/operators/quantized/int8_utils.h
|
||||
# caffe2/operators/stump_func_op.cc
|
||||
# caffe2/operators/stump_func_op.cu
|
||||
# caffe2/operators/stump_func_op.h
|
||||
# caffe2/operators/unique_ops.cc
|
||||
# caffe2/operators/unique_ops.cu
|
||||
# caffe2/operators/unique_ops.h
|
||||
# caffe2/operators/upsample_op.cc
|
||||
# caffe2/operators/upsample_op.h
|
||||
# caffe2/opt/fusion.h
|
||||
# caffe2/python/layers/label_smooth.py
|
||||
# caffe2/python/mint/static/css/simple-sidebar.css
|
||||
# caffe2/python/modeling/get_entry_from_blobs.py
|
||||
# caffe2/python/modeling/get_entry_from_blobs_test.py
|
||||
# caffe2/python/modeling/gradient_clipping_test.py
|
||||
# caffe2/python/operator_test/unique_ops_test.py
|
||||
# caffe2/python/operator_test/upsample_op_test.py
|
||||
# caffe2/python/operator_test/weight_scale_test.py
|
||||
# caffe2/python/pybind_state_int8.cc
|
||||
# caffe2/python/transformations.py
|
||||
# caffe2/python/transformations_test.py
|
||||
# caffe2/quantization/server/batch_matmul_dnnlowp_op.cc
|
||||
# caffe2/quantization/server/batch_matmul_dnnlowp_op.h
|
||||
# caffe2/quantization/server/compute_equalization_scale_test.py
|
||||
# caffe2/quantization/server/elementwise_linear_dnnlowp_op.cc
|
||||
# caffe2/quantization/server/elementwise_linear_dnnlowp_op.h
|
||||
# caffe2/quantization/server/elementwise_sum_relu_op.cc
|
||||
# caffe2/quantization/server/fb_fc_packed_op.cc
|
||||
# caffe2/quantization/server/fb_fc_packed_op.h
|
||||
# caffe2/quantization/server/fbgemm_fp16_pack_op.cc
|
||||
# caffe2/quantization/server/fbgemm_fp16_pack_op.h
|
||||
# caffe2/quantization/server/fully_connected_fake_lowp_op.cc
|
||||
# caffe2/quantization/server/fully_connected_fake_lowp_op.h
|
||||
# caffe2/quantization/server/int8_gen_quant_params_min_max_test.py
|
||||
# caffe2/quantization/server/int8_gen_quant_params_test.py
|
||||
# caffe2/quantization/server/int8_quant_scheme_blob_fill_test.py
|
||||
# caffe2/quantization/server/spatial_batch_norm_relu_op.cc
|
||||
# caffe2/sgd/weight_scale_op.cc
|
||||
# caffe2/sgd/weight_scale_op.h
|
||||
# caffe2/utils/bench_utils.h
|
||||
# functorch/examples/maml_omniglot/maml-omniglot-higher.py
|
||||
# functorch/examples/maml_omniglot/maml-omniglot-ptonly.py
|
||||
# functorch/examples/maml_omniglot/maml-omniglot-transforms.py
|
||||
# functorch/examples/maml_omniglot/support/omniglot_loaders.py
|
||||
# modules/detectron/group_spatial_softmax_op.cc
|
||||
# modules/detectron/group_spatial_softmax_op.cu
|
||||
# modules/detectron/group_spatial_softmax_op.h
|
||||
# modules/detectron/ps_roi_pool_op.cc
|
||||
# modules/detectron/ps_roi_pool_op.h
|
||||
# modules/detectron/roi_pool_f_op.cc
|
||||
# modules/detectron/roi_pool_f_op.cu
|
||||
# modules/detectron/roi_pool_f_op.h
|
||||
# modules/detectron/sample_as_op.cc
|
||||
# modules/detectron/sample_as_op.cu
|
||||
# modules/detectron/sample_as_op.h
|
||||
# modules/detectron/select_smooth_l1_loss_op.cc
|
||||
# modules/detectron/select_smooth_l1_loss_op.cu
|
||||
# modules/detectron/select_smooth_l1_loss_op.h
|
||||
# modules/detectron/sigmoid_cross_entropy_loss_op.cc
|
||||
# modules/detectron/sigmoid_cross_entropy_loss_op.cu
|
||||
# modules/detectron/sigmoid_cross_entropy_loss_op.h
|
||||
# modules/detectron/sigmoid_focal_loss_op.cc
|
||||
# modules/detectron/sigmoid_focal_loss_op.cu
|
||||
# modules/detectron/sigmoid_focal_loss_op.h
|
||||
# modules/detectron/smooth_l1_loss_op.cc
|
||||
# modules/detectron/smooth_l1_loss_op.cu
|
||||
# modules/detectron/smooth_l1_loss_op.h
|
||||
# modules/detectron/softmax_focal_loss_op.cc
|
||||
# modules/detectron/softmax_focal_loss_op.cu
|
||||
# modules/detectron/softmax_focal_loss_op.h
|
||||
# modules/detectron/spatial_narrow_as_op.cc
|
||||
# modules/detectron/spatial_narrow_as_op.cu
|
||||
# modules/detectron/spatial_narrow_as_op.h
|
||||
# modules/detectron/upsample_nearest_op.cc
|
||||
# modules/detectron/upsample_nearest_op.h
|
||||
# modules/module_test/module_test_dynamic.cc
|
||||
# modules/rocksdb/rocksdb.cc
|
||||
# scripts/apache_header.txt
|
||||
# scripts/apache_python.txt
|
||||
# torch/distributions/lkj_cholesky.py
|
||||
#
|
||||
# Apache 2.0 AND BSD 2-Clause
|
||||
# caffe2/operators/deform_conv_op.cu
|
||||
#
|
||||
# Apache 2.0 AND BSD 2-Clause AND MIT
|
||||
# modules/detectron/ps_roi_pool_op.cu
|
||||
#
|
||||
# Apache 2.0 AND BSD 2-Clause
|
||||
# modules/detectron/upsample_nearest_op.cu
|
||||
#
|
||||
# BSD 0-Clause
|
||||
# torch/csrc/utils/pythoncapi_compat.h
|
||||
#
|
||||
# BSD 2-Clause
|
||||
# aten/src/ATen/native/quantized/cpu/qnnpack/deps/clog/LICENSE
|
||||
# caffe2/image/transform_gpu.cu
|
||||
# caffe2/image/transform_gpu.h
|
||||
#
|
||||
# BSL-1.0
|
||||
# c10/util/flat_hash_map.h
|
||||
# c10/util/hash.h
|
||||
# c10/util/Optional.h
|
||||
# c10/util/order_preserving_flat_hash_map.h
|
||||
# c10/util/strong_type.h
|
||||
# c10/util/variant.h
|
||||
#
|
||||
# GPL-3.0-or-later AND MIT
|
||||
# c10/util/reverse_iterator.h
|
||||
#
|
||||
# Khronos
|
||||
# These files are for OpenCL, an unused option
|
||||
# Replace them later, as-needed with the opencl-headers.rpm
|
||||
#
|
||||
# caffe2/contrib/opencl/OpenCL/cl.hpp
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl.hpp
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_ext.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_gl_ext.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/cl_platform.h
|
||||
# caffe2/mobile/contrib/libopencl-stub/include/CL/opencl.h
|
||||
#
|
||||
# MIT
|
||||
# android/libs/fbjni/googletest-CMakeLists.txt.in
|
||||
# c10/util/BFloat16-math.h
|
||||
# caffe2/mobile/contrib/libvulkan-stub/include/libvulkan-stub.h
|
||||
# caffe2/mobile/contrib/libvulkan-stub/src/libvulkan-stub.c
|
||||
# caffe2/onnx/torch_ops/defs.cc
|
||||
# cmake/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake
|
||||
# cmake/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake
|
||||
# cmake/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake
|
||||
# functorch/einops/_parsing.py
|
||||
# test/functorch/test_parsing.py
|
||||
# test/functorch/test_rearrange.py
|
||||
# third_party/miniz-2.1.0/LICENSE
|
||||
# third_party/miniz-2.1.0/miniz.c
|
||||
# tools/coverage_plugins_package/setup.py
|
||||
# torch/_appdirs.py
|
||||
# torch/utils/hipify/hipify_python.py
|
||||
#
|
||||
# Public Domain
|
||||
# caffe2/mobile/contrib/libopencl-stub/LICENSE
|
||||
# caffe2/utils/murmur_hash3.cc
|
||||
# caffe2/utils/murmur_hash3.h
|
||||
#
|
||||
# Zlib
|
||||
# aten/src/ATen/native/cpu/avx_mathfun.h
|
||||
|
||||
@@ -0,0 +1,578 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
#
|
||||
# Originally extracted from Fedora Project
|
||||
# Authors: The Fedora Project Contributors
|
||||
|
||||
%global srcname torch
|
||||
|
||||
%global toolchain clang
|
||||
|
||||
%global pypi_version 2.11.0
|
||||
%global miniz_version 3.0.2
|
||||
|
||||
# For -test subpackage
|
||||
# suitable only for local testing
|
||||
# Install and do something like
|
||||
# export LD_LIBRARY_PATH=/usr/lib64/python3.12/site-packages/torch/lib
|
||||
# /usr/lib64/python3.12/site-packages/torch/bin/test_api, test_lazy
|
||||
%bcond test 0
|
||||
|
||||
%bcond rocm 1
|
||||
|
||||
# For testing distributed+rccl etc.
|
||||
# TODO: openmpi not included in openRuyi
|
||||
%bcond mpi 0
|
||||
|
||||
%global _lto_cflags %nil
|
||||
|
||||
# Disable dwz with rocm because memory can be exhausted
|
||||
%if %{with rocm}
|
||||
%define _find_debuginfo_dwz_opts %{nil}
|
||||
%endif
|
||||
|
||||
# Pytorch third-party buildrequires
|
||||
#
|
||||
# These system_xxx is kept for debug with some reasons:
|
||||
#
|
||||
# 1. some package that is not included in openRuyi.
|
||||
# 2. some package on openRuyi lack some required component.
|
||||
# 3. the corresponding version is mismatched with openRuyi.
|
||||
%bcond system_flatbuffers 0
|
||||
# Pytorch hardcode httplib to third_party/cpp-httplib
|
||||
%bcond system_httplib 0
|
||||
# TODO: kineto not included in openruyi
|
||||
%bcond system_kineto 0
|
||||
# TODO: tensorpipe not included in openRuyi
|
||||
%bcond system_tensorpipe 0
|
||||
|
||||
Name: python-%{srcname}
|
||||
Version: %{pypi_version}
|
||||
Release: %autorelease
|
||||
Summary: PyTorch AI/ML framework
|
||||
# See license.txt for license details
|
||||
License: BSD-3-Clause AND BSD-2-Clause AND 0BSD AND Apache-2.0 AND MIT AND BSL-1.0 AND GPL-3.0-or-later AND Zlib
|
||||
URL: https://pytorch.org/
|
||||
#!RemoteAsset: sha256:52872a6bbdc42334b00051d88a92f801cfd9be730abdd2b37a2d08996f53bb29
|
||||
Source0: https://github.com/pytorch/pytorch/archive/refs/tags/v%{version}.tar.gz
|
||||
%if %{without system_flatbuffers}
|
||||
%global flatbuffers_version 24.12.23
|
||||
#!RemoteAsset: sha256:7e2ef35f1af9e2aa0c6a7d0a09298c2cb86caf3d4f58c0658b306256e5bcab10
|
||||
Source1: https://github.com/google/flatbuffers/archive/refs/tags/v%{flatbuffers_version}.tar.gz
|
||||
%endif
|
||||
%if %{without system_tensorpipe}
|
||||
# Developement on tensorpipe has stopped, repo made read only July 1, 2023, this is the last commit
|
||||
%global tp_commit 2b4cd91092d335a697416b2a3cb398283246849d
|
||||
%global tp_scommit 2b4cd91
|
||||
#!RemoteAsset: sha256:0e85ca56bfe25ed7b3026d2784f716eb10ed1328ade346e3a252814752c57eeb
|
||||
Source2: https://github.com/pytorch/tensorpipe/archive/%{tp_commit}/tensorpipe-%{tp_scommit}.tar.gz
|
||||
# The old libuv tensorpipe uses
|
||||
#!RemoteAsset: sha256:6cfeb5f4bab271462b4a2cc77d4ecec847fdbdc26b72019c27ae21509e6f94fa
|
||||
Source3: https://github.com/libuv/libuv/archive/refs/tags/v1.41.0.tar.gz
|
||||
# Developement afaik on libnop has stopped, this is the last commit
|
||||
%global nop_commit 910b55815be16109f04f4180e9adee14fb4ce281
|
||||
%global nop_scommit 910b558
|
||||
#!RemoteAsset: sha256:ec3604671f8ea11aed9588825f9098057ebfef7a8908e97459835150eea9f63a
|
||||
Source4: https://github.com/google/libnop/archive/%{nop_commit}/libnop-%{nop_scommit}.tar.gz
|
||||
%endif
|
||||
|
||||
%if %{without system_httplib}
|
||||
%global hl_commit 4d7c9a788de136071ccf0dd4e96239151e2adadb
|
||||
%global hl_scommit 4d7c9a7
|
||||
#!RemoteAsset: sha256:8ecb7bbe844f9b4a1418b8a015d0f815d021d2c0d53291387122cb510c8783ef
|
||||
Source5: https://github.com/yhirose/cpp-httplib/archive/%{hl_commit}/cpp-httplib-%{hl_scommit}.tar.gz
|
||||
%endif
|
||||
%if %{without system_kineto}
|
||||
%global ki_commit 23b5bb5764b3dec988e25c52098407e508d84bb4
|
||||
%global ki_scommit 23b5bb5
|
||||
#!RemoteAsset: sha256:5b85352628319e22c48b589d2f423f3761479058f87a3ecc328818f16e4394c6
|
||||
Source6: https://github.com/pytorch/kineto/archive/%{ki_commit}/kineto-%{ki_scommit}.tar.gz
|
||||
%endif
|
||||
|
||||
%global mslk_commit 3d332d1c0c0ac7765852c97b3979c9ef913e037f
|
||||
%global mslk_scommit 3d332d1
|
||||
#!RemoteAsset: sha256:1944e67d1baeffef3bb8f89793ea06e0f05b88aac4d5cd89b4558a21aca6754b
|
||||
Source7: https://github.com/meta-pytorch/MSLK/archive/%{mslk_commit}/MSLK-%{mslk_scommit}.tar.gz
|
||||
|
||||
# pytorch upstream issue #173707: libtorch_hip.so references the
|
||||
# const_data_ptr / mutable_data_ptr / data_ptr template family with a
|
||||
# different (non-SFINAE) mangling than libtorch_cpu.so exports.
|
||||
# Appended to aten/src/ATen/core/Tensor.cpp in %prep when rocm is enabled.
|
||||
Source8: pytorch-rocm-symbol-bridge.cpp
|
||||
|
||||
# Fix magma version encoding
|
||||
# https://github.com/pytorch/pytorch/pull/180388
|
||||
Patch0: 0001-pytorch-magma-2.10.0-version-encoding.patch
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(concurrentqueue)
|
||||
BuildRequires: cmake(sleef)
|
||||
BuildRequires: cpuinfo
|
||||
# Although eigen3 enabled on openruyi, it cannot be detected during conf
|
||||
# TODO: Fix this
|
||||
BuildRequires: eigen3
|
||||
BuildRequires: foxi-devel
|
||||
BuildRequires: libomp-devel
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(fmt)
|
||||
BuildRequires: pkgconfig(nlohmann_json)
|
||||
BuildRequires: pkgconfig(numa)
|
||||
BuildRequires: pkgconfig(openblas64)
|
||||
BuildRequires: pkgconfig(protobuf)
|
||||
BuildRequires: pkgconfig(valgrind)
|
||||
BuildRequires: pocketfft-devel
|
||||
BuildRequires: pthreadpool-devel
|
||||
BuildRequires: fp16-devel
|
||||
BuildRequires: fxdiv-devel
|
||||
BuildRequires: psimd-devel
|
||||
BuildRequires: xnnpack-devel = 0+git20260211.312eb7e
|
||||
BuildRequires: pkgconfig(python3)
|
||||
BuildRequires: python3dist(filelock)
|
||||
BuildRequires: python3dist(jinja2)
|
||||
BuildRequires: python3dist(networkx)
|
||||
BuildRequires: python3dist(numpy)
|
||||
BuildRequires: python3dist(pip)
|
||||
BuildRequires: python3dist(pybind11)
|
||||
BuildRequires: python3dist(pyyaml)
|
||||
BuildRequires: python3dist(setuptools)
|
||||
BuildRequires: python3dist(sympy)
|
||||
BuildRequires: python3dist(typing-extensions)
|
||||
|
||||
%if %{with system_httplib}
|
||||
BuildRequires: cmake(httplib)
|
||||
%endif
|
||||
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: libstdc++-devel
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: cmake(LLVM)
|
||||
BuildRequires: lld
|
||||
|
||||
BuildRequires: cmake(ONNX)
|
||||
BuildRequires: cmake(onnxruntime)
|
||||
|
||||
%if %{with mpi}
|
||||
BuildRequires: openmpi-devel
|
||||
%endif
|
||||
|
||||
%if %{with system_flatbuffers}
|
||||
BuildRequires: pkgconfig(flatbuffers)
|
||||
%endif
|
||||
|
||||
%if %{with rocm}
|
||||
BuildRequires: cmake(hipblas)
|
||||
BuildRequires: cmake(hipblaslt)
|
||||
BuildRequires: cmake(hipcub)
|
||||
BuildRequires: cmake(hipfft)
|
||||
BuildRequires: cmake(hiprand)
|
||||
BuildRequires: cmake(hipsparse)
|
||||
BuildRequires: cmake(hipsparselt)
|
||||
BuildRequires: cmake(hipsolver)
|
||||
BuildRequires: cmake(miopen)
|
||||
BuildRequires: cmake(rocblas)
|
||||
BuildRequires: cmake(rocrand)
|
||||
BuildRequires: cmake(rocfft)
|
||||
BuildRequires: cmake(rccl)
|
||||
BuildRequires: cmake(rocprim)
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(rocm-core)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocsolver)
|
||||
BuildRequires: cmake(rocm_smi)
|
||||
BuildRequires: cmake(rocthrust)
|
||||
BuildRequires: pkgconfig(magma)
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: roctracer-devel
|
||||
%endif
|
||||
|
||||
Requires: python3dist(dill)
|
||||
Requires: python3dist(pyyaml)
|
||||
%if %{with rocm}
|
||||
Requires: amdsmi
|
||||
%endif
|
||||
|
||||
# As convention
|
||||
Provides: pytorch = %{version}-%{release}
|
||||
Provides: python3-%{srcname} = %{version}-%{release}
|
||||
Provides: python3-%{srcname}%{?_isa} = %{version}-%{release}
|
||||
%python_provide python3-%{srcname}
|
||||
|
||||
%description
|
||||
PyTorch is a Python package that provides two high-level features:
|
||||
|
||||
* Tensor computation (like NumPy) with strong GPU acceleration
|
||||
* Deep neural networks built on a tape-based autograd system
|
||||
|
||||
You can reuse your favorite Python packages such as NumPy, SciPy,
|
||||
and Cython to extend PyTorch when needed.
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -n pytorch-%{version}
|
||||
|
||||
# GitHub release tarballs identify the version as an alpha, so replace that
|
||||
echo "%{pypi_version}" > version.txt
|
||||
|
||||
# Remove bundled egg-info
|
||||
rm -rf %{srcname}.egg-info
|
||||
|
||||
%if %{without system_flatbuffers}
|
||||
tar xf %{SOURCE1}
|
||||
rm -rf third_party/flatbuffers/*
|
||||
cp -r flatbuffers-%{flatbuffers_version}/* third_party/flatbuffers/
|
||||
%endif
|
||||
|
||||
%if %{without system_tensorpipe}
|
||||
tar xf %{SOURCE2}
|
||||
rm -rf third_party/tensorpipe/*
|
||||
cp -r tensorpipe-*/* third_party/tensorpipe/
|
||||
tar xf %{SOURCE3}
|
||||
rm -rf third_party/tensorpipe/third_party/libuv/*
|
||||
cp -r libuv-*/* third_party/tensorpipe/third_party/libuv/
|
||||
tar xf %{SOURCE4}
|
||||
rm -rf third_party/tensorpipe/third_party/libnop/*
|
||||
cp -r libnop-*/* third_party/tensorpipe/third_party/libnop/
|
||||
|
||||
# gcc 15 include cstdint
|
||||
sed -i '/#include <tensorpipe.*/a#include <cstdint>' third_party/tensorpipe/tensorpipe/common/allocator.h
|
||||
sed -i '/#include <tensorpipe.*/a#include <cstdint>' third_party/tensorpipe/tensorpipe/common/memory.h
|
||||
%endif
|
||||
|
||||
%if %{without system_httplib}
|
||||
tar xf %{SOURCE5}
|
||||
rm -rf third_party/cpp-httplib/*
|
||||
cp -r cpp-httplib-*/* third_party/cpp-httplib/
|
||||
%endif
|
||||
|
||||
%if %{without system_kineto}
|
||||
tar xf %{SOURCE6}
|
||||
rm -rf third_party/kineto/*
|
||||
cp -r kineto-*/* third_party/kineto/
|
||||
%endif
|
||||
|
||||
tar xf %{SOURCE7}
|
||||
rm -rf third_party/mslk/*
|
||||
cp -r MSLK-*/* third_party/mslk/
|
||||
|
||||
# Adjust for amd gpu targets currently supported
|
||||
# only gfx1100 supported on openruyi
|
||||
sed -i -e 's@"gfx90a", "gfx942",@@' aten/src/ATen/native/cuda/Blas.cpp
|
||||
sed -i -e 's@"gfx1100", "gfx1101", "gfx1200", "gfx1201", "gfx908"@"gfx1100", "gfx1101",@' aten/src/ATen/native/cuda/Blas.cpp
|
||||
sed -i -e 's@"gfx950", "gfx1150", "gfx1151"@@' aten/src/ATen/native/cuda/Blas.cpp
|
||||
|
||||
# Need to pip this
|
||||
sed -i -e '/fsspec/d' setup.py
|
||||
|
||||
# Use system sympy
|
||||
sed -i -e 's@sympy==1.13.1@sympy>=1.13.1@' setup.py
|
||||
|
||||
# A new dependency
|
||||
# Connected to USE_FLASH_ATTENTION, since this is off, do not need it
|
||||
sed -i -e '/aotriton.cmake/d' cmake/Dependencies.cmake
|
||||
# Compress hip
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc --offload-compress@' cmake/Dependencies.cmake
|
||||
# Silence noisy warning
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-pass-failed@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-command-line-argument@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-unused-result@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -Wno-deprecated-declarations@' cmake/Dependencies.cmake
|
||||
# Fix: error: branch size exceeds simm16 (AMDGPUAsmBackend.cpp)
|
||||
# -amdgpu-s-branch-bits=15(default is 16) and -amdgpu-long-branch-factor=2 are needed to avoid 'branch size exceed simm16' error
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -mllvm --amdgpu-s-branch-bits=15@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc -mllvm --amdgpu-long-branch-factor=2@' cmake/Dependencies.cmake
|
||||
|
||||
# Use parallel jobs for GPU offload compilation
|
||||
sed -i -e 's@HIP_CLANG_FLAGS -fno-gpu-rdc@HIP_CLANG_FLAGS -fno-gpu-rdc --offload-jobs=8@' cmake/Dependencies.cmake
|
||||
# Need to link with librocm_smi64 (intra_node_comm.cpp calls rsmi_init /
|
||||
# rsmi_is_P2P_accessible). The target string is "hiprtc::hiprtc" — the previous
|
||||
# pattern "hipzrtc::hiprtc" had a stray 'z' so the sed was a no-op and
|
||||
# libtorch_hip.so ended up with an undefined rsmi_init symbol.
|
||||
sed -i -e 's@hiprtc::hiprtc@hiprtc::hiprtc rocm_smi64@' cmake/Dependencies.cmake
|
||||
|
||||
# No third_party fmt, use system
|
||||
sed -i -e 's@fmt::fmt-header-only@fmt@' CMakeLists.txt
|
||||
sed -i -e 's@fmt::fmt-header-only@fmt@' aten/src/ATen/CMakeLists.txt
|
||||
sed -i -e 's@list(APPEND ATen_HIP_INCLUDE $<TARGET_PROPERTY:fmt,INTERFACE_INCLUDE_DIRECTORIES>)@@' aten/src/ATen/CMakeLists.txt
|
||||
|
||||
sed -i -e 's@fmt::fmt-header-only@fmt@' third_party/kineto/libkineto/CMakeLists.txt
|
||||
sed -i -e 's@fmt::fmt-header-only@fmt@' c10/CMakeLists.txt
|
||||
sed -i -e 's@fmt::fmt-header-only@fmt@' torch/CMakeLists.txt
|
||||
sed -i -e 's@fmt::fmt-header-only@fmt@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@fmt::fmt-header-only@fmt@' caffe2/CMakeLists.txt
|
||||
|
||||
sed -i -e 's@add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@#add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@#set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")@' cmake/Dependencies.cmake
|
||||
sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@#list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)@' cmake/Dependencies.cmake
|
||||
|
||||
# No third_party FXdiv
|
||||
sed -i -e 's@if(NOT TARGET fxdiv)@if(MSVC AND USE_XNNPACK)@' caffe2/CMakeLists.txt
|
||||
sed -i -e 's@TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@#TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)@' caffe2/CMakeLists.txt
|
||||
|
||||
# https://github.com/pytorch/pytorch/issues/149803
|
||||
# Tries to checkout nccl
|
||||
sed -i -e 's@ checkout_nccl()@ True@' tools/build_pytorch_libs.py
|
||||
|
||||
# Disable the use of check_submodule's in the setup.py, we are a tarball, not a git repo
|
||||
sed -i -e 's@check_submodules()$@#check_submodules()@' setup.py
|
||||
|
||||
# Release comes fully loaded with third party src
|
||||
# Remove what we can
|
||||
#
|
||||
# For 2.1 this is all but miniz-2.1.0
|
||||
# Instead of building as a library, caffe2 reaches into
|
||||
# the third_party dir to compile the file.
|
||||
# mimiz is licensed MIT
|
||||
# https://github.com/richgel999/miniz/blob/master/LICENSE
|
||||
mv third_party/miniz-%{miniz_version} .
|
||||
#
|
||||
# setup.py depends on this script
|
||||
mv third_party/build_bundled.py .
|
||||
|
||||
%if %{without system_flatbuffers}
|
||||
# Need the just untarred flatbuffers/flatbuffers.h
|
||||
mv third_party/flatbuffers .
|
||||
%endif
|
||||
|
||||
%if %{without system_tensorpipe}
|
||||
mv third_party/tensorpipe .
|
||||
%endif
|
||||
|
||||
%if %{without system_httplib}
|
||||
mv third_party/cpp-httplib .
|
||||
%endif
|
||||
|
||||
%if %{without system_kineto}
|
||||
mv third_party/kineto .
|
||||
%endif
|
||||
|
||||
mv third_party/mslk .
|
||||
|
||||
# Remove everything
|
||||
rm -rf third_party/*
|
||||
# Put stuff back
|
||||
mv build_bundled.py third_party
|
||||
mv miniz-%{miniz_version} third_party
|
||||
|
||||
%if %{without system_flatbuffers}
|
||||
mv flatbuffers third_party
|
||||
%endif
|
||||
|
||||
%if %{without system_tensorpipe}
|
||||
mv tensorpipe third_party
|
||||
%endif
|
||||
|
||||
%if %{without system_httplib}
|
||||
mv cpp-httplib third_party
|
||||
%endif
|
||||
|
||||
%if %{without system_kineto}
|
||||
mv kineto third_party
|
||||
%endif
|
||||
|
||||
mv mslk third_party
|
||||
|
||||
# Fake out pocketfft, and system header will be used
|
||||
mkdir third_party/pocketfft
|
||||
cp /usr/include/pocketfft_hdronly.h third_party/pocketfft/
|
||||
|
||||
# Use the system valgrind headers
|
||||
mkdir third_party/valgrind-headers
|
||||
cp %{_includedir}/valgrind/* third_party/valgrind-headers
|
||||
|
||||
# Fix installing to /usr/lib64
|
||||
sed -i -e 's@DESTINATION ${PYTHON_LIB_REL_PATH}@DESTINATION ${CMAKE_INSTALL_PREFIX}/${PYTHON_LIB_REL_PATH}@' caffe2/CMakeLists.txt
|
||||
|
||||
# reenable foxi linking
|
||||
sed -i -e 's@list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@#list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)@' cmake/Dependencies.cmake
|
||||
|
||||
%if %{without system_tensorpipe}
|
||||
# cmake version changed
|
||||
sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' third_party/tensorpipe/third_party/libuv/CMakeLists.txt
|
||||
sed -i -e 's@cmake_minimum_required(VERSION 3.4)@cmake_minimum_required(VERSION 3.5)@' libuv*/CMakeLists.txt
|
||||
%endif
|
||||
|
||||
%if %{with rocm}
|
||||
# Fix: hipOccupancyMaxActiveBlocksPerMultiprocessor is overloaded in new ROCm,
|
||||
# force using hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
sed -i -e 's/TORCH_HIP_VERSION < 305/TORCH_HIP_VERSION < 305 \&\& TORCH_HIP_VERSION > 0/' \
|
||||
aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.h
|
||||
# pytorch upstream issue #173707 (gemm/bgemm variant):
|
||||
# clang 21 mangles the instantiation-dependent SFINAE non-type template parameter
|
||||
# typename std::enable_if<...,Dtype>::type* = nullptr
|
||||
# of at::cuda::blas::gemm/bgemm differently at an explicit specialization (the
|
||||
# definition, Tn...enable_if form) than at a deduced call site (the reference,
|
||||
# ...IffLPf0E... form), so libtorch_hip.so fails to dlopen with e.g.
|
||||
# undefined symbol: _ZN2at4cuda4blas4gemmIffLPf0EEEvcclllNS_10OpMathTypeIT_E4typeEPKS5_lS9_lS7_PT0_l
|
||||
# Every real dtype is provided by an explicit specialization, so the SFINAE guard
|
||||
# is redundant: drop it so the two overloads collapse to one primary template and
|
||||
# clang emits a single consistent mangling everywhere. Must run before hipify.
|
||||
sed -i \
|
||||
-e 's/, typename std::enable_if<!CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT, Dtype>::type\* = nullptr>/>/g' \
|
||||
-e 's/, typename std::enable_if<CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT, Dtype>::type\* = nullptr>/>/g' \
|
||||
aten/src/ATen/cuda/CUDABlas.h
|
||||
# hipify
|
||||
./tools/amd_build/build_amd.py
|
||||
# use any hip, correct CMAKE_MODULE_PATH
|
||||
sed -i -e 's@lib/cmake/hip@lib64/cmake/hip@' cmake/public/LoadHIP.cmake
|
||||
sed -i -e 's@HIP 1.0@HIP MODULE@' cmake/public/LoadHIP.cmake
|
||||
# silence an assert
|
||||
# sed -i -e '/qvalue = std::clamp(qvalue, qmin, qmax);/d' aten/src/ATen/native/cuda/IndexKernel.cu
|
||||
|
||||
# Append ROCm symbol bridge — see Source8 header for full context.
|
||||
# Without this, libtorch_hip.so dlopen fails on:
|
||||
# undefined symbol: _ZNK2at10TensorBase14const_data_ptrI*Li0EEEPK*v
|
||||
cat %{SOURCE8} >> aten/src/ATen/core/Tensor.cpp
|
||||
%endif
|
||||
|
||||
# moodycamel include path needs adjusting to use the system's
|
||||
sed -i -e 's@${PROJECT_SOURCE_DIR}/third_party/concurrentqueue@/usr/include/concurrentqueue@' cmake/Dependencies.cmake
|
||||
|
||||
%build
|
||||
# Control the number of jobs
|
||||
# The build can fail if too many threads exceed the physical memory
|
||||
# Run at least one thread, more if CPU & memory resources are available.
|
||||
COMPILE_JOBS=`nproc`
|
||||
if [ ${COMPILE_JOBS}x = x ]; then
|
||||
COMPILE_JOBS=1
|
||||
fi
|
||||
# Take into account memory usage per core, do not thrash real memory
|
||||
# TraceType/VariableType files can consume 4GB+ per compilation unit
|
||||
# Use a more conservative estimate: 4GB per job for safety
|
||||
BUILD_MEM=4
|
||||
MEM_KB=0
|
||||
MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'`
|
||||
MEM_MB=`eval "expr ${MEM_KB} / 1024"`
|
||||
MEM_GB=`eval "expr ${MEM_MB} / 1024"`
|
||||
COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"`
|
||||
if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then
|
||||
COMPILE_JOBS=$COMPILE_JOBS_MEM
|
||||
fi
|
||||
# Ensure at least 2 jobs to avoid single-threading the large files
|
||||
if [ "$COMPILE_JOBS" -lt 2 ]; then
|
||||
COMPILE_JOBS=2
|
||||
fi
|
||||
export MAX_JOBS=$COMPILE_JOBS
|
||||
|
||||
# For verbose cmake output
|
||||
# export VERBOSE=ON
|
||||
# For verbose linking
|
||||
# export CMAKE_SHARED_LINKER_FLAGS=-Wl,--verbose
|
||||
|
||||
# Manually set this hardening flag
|
||||
export CMAKE_EXE_LINKER_FLAGS=-pie
|
||||
export BUILD_CUSTOM_PROTOBUF=OFF
|
||||
export BUILD_NVFUSER=OFF
|
||||
export BUILD_SHARED_LIBS=ON
|
||||
export BUILD_TEST=OFF
|
||||
# Use Release instead of RelWithDebInfo to reduce compile time and memory
|
||||
# for huge generated files like TraceType/VariableType (saves ~30% compile time)
|
||||
export CMAKE_BUILD_TYPE=Release
|
||||
export CMAKE_FIND_PACKAGE_PREFER_CONFIG=ON
|
||||
export CAFFE2_LINK_LOCAL_PROTOBUF=OFF
|
||||
export INTERN_BUILD_MOBILE=OFF
|
||||
export USE_DISTRIBUTED=OFF
|
||||
export USE_CUDA=OFF
|
||||
export USE_FAKELOWP=OFF
|
||||
export USE_FBGEMM=OFF
|
||||
export USE_FLASH_ATTENTION=OFF
|
||||
export USE_GLOO=OFF
|
||||
export USE_ITT=OFF
|
||||
export USE_KINETO=OFF
|
||||
export USE_KLEIDIAI=OFF
|
||||
export USE_LITE_INTERPRETER_PROFILER=OFF
|
||||
export USE_LITE_PROTO=OFF
|
||||
export USE_MAGMA=OFF
|
||||
export USE_MEM_EFF_ATTENTION=OFF
|
||||
export USE_MKLDNN=OFF
|
||||
export USE_MPI=OFF
|
||||
export USE_MSLK=OFF
|
||||
export USE_NCCL=OFF
|
||||
export USE_NNPACK=OFF
|
||||
export USE_NUMPY=ON
|
||||
export USE_OPENMP=ON
|
||||
export USE_PYTORCH_QNNPACK=OFF
|
||||
export USE_ROCM=OFF
|
||||
export USE_SYSTEM_SLEEF=ON
|
||||
export USE_SYSTEM_EIGEN_INSTALL=ON
|
||||
export USE_SYSTEM_ONNX=ON
|
||||
export USE_SYSTEM_PYBIND11=ON
|
||||
export USE_SYSTEM_LIBS=OFF
|
||||
export USE_SYSTEM_NCCL=OFF
|
||||
export USE_XNNPACK=OFF
|
||||
export USE_XPU=OFF
|
||||
export USE_SYSTEM_PTHREADPOOL=ON
|
||||
export USE_SYSTEM_CPUINFO=ON
|
||||
export USE_SYSTEM_FP16=ON
|
||||
export USE_SYSTEM_FXDIV=ON
|
||||
export USE_SYSTEM_PSIMD=ON
|
||||
export USE_SYSTEM_XNNPACK=OFF
|
||||
export USE_DISTRIBUTED=ON
|
||||
export USE_TENSORPIPE=ON
|
||||
%if %{without system_tensorpipe}
|
||||
export TP_BUILD_LIBUV=OFF
|
||||
%endif
|
||||
|
||||
%if %{with mpi}
|
||||
export USE_MPI=ON
|
||||
%endif
|
||||
|
||||
%if %{with rocm}
|
||||
export USE_ROCM=ON
|
||||
export USE_ROCM_CK_SDPA=OFF
|
||||
export USE_ROCM_CK_GEMM=OFF
|
||||
export USE_FBGEMM_GENAI=OFF
|
||||
|
||||
export USE_MAGMA=ON
|
||||
export HIP_PATH=`hipconfig -p`
|
||||
export ROCM_PATH=`hipconfig -R`
|
||||
|
||||
# pytorch uses clang, not hipcc
|
||||
export HIP_CLANG_PATH=%{rocmllvm_bindir}
|
||||
export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}
|
||||
|
||||
export CMAKE_NO_SYSTEM_FROM_IMPORTED=ON
|
||||
|
||||
# export CMAKE_BUILD_TYPE=Debug
|
||||
%endif
|
||||
|
||||
export CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES="/usr/include"
|
||||
export CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES="/usr/include"
|
||||
|
||||
export LDFLAGS="-fuse-ld=lld %{?__global_ldflags}"
|
||||
export CMAKE_LIBRARY_PATH=/usr/lib64
|
||||
export CMAKE_PREFIX_PATH="/usr:/usr/lib64/cmake:/usr/lib/python3.13/site-packages"
|
||||
|
||||
%pyproject_wheel
|
||||
|
||||
%install
|
||||
%if %{with rocm}
|
||||
export USE_ROCM=ON
|
||||
export USE_ROCM_CK=OFF
|
||||
export HIP_PATH=`hipconfig -p`
|
||||
export ROCM_PATH=`hipconfig -R`
|
||||
|
||||
# pytorch uses clang, not hipcc
|
||||
export HIP_CLANG_PATH=%{rocmllvm_bindir}
|
||||
export PYTORCH_ROCM_ARCH=%{rocm_gpu_list_default}
|
||||
%endif
|
||||
|
||||
%pyproject_install
|
||||
%pyproject_save_files '*torch*'
|
||||
|
||||
%check
|
||||
# Not working yet
|
||||
|
||||
%files
|
||||
%license LICENSE
|
||||
%doc README.md
|
||||
%{_bindir}/torchrun
|
||||
%{python3_sitearch}/%{srcname}*
|
||||
%{python3_sitearch}/functorch
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
@@ -0,0 +1,183 @@
|
||||
// === openRuyi ROCm symbol bridge for pytorch 2.11 / clang 21 ===
|
||||
//
|
||||
// Appended to aten/src/ATen/core/Tensor.cpp by python-torch.spec.
|
||||
//
|
||||
// Background:
|
||||
// libtorch_hip.so references the TensorBase data-pointer template family
|
||||
// (const_data_ptr / mutable_data_ptr / data_ptr) using a NON-SFINAE
|
||||
// mangling form (...Li0EEE... — only the non-type template parameter
|
||||
// value is encoded). The explicit specialisations emitted from
|
||||
// TensorMethods.cpp into libtorch_cpu.so may or may not be mangled
|
||||
// the same way depending on clang's handling of SFINAE NTTPs in this
|
||||
// specific clang/ROCm/arch combination (we have observed both: cpu
|
||||
// exporting the Li0E form, and cpu only exporting the Tn...enable_if
|
||||
// form). No link-time error in the latter case thanks to lld's
|
||||
// --allow-shlib-undefined → runtime dlopen reports
|
||||
// "undefined symbol: _ZNK2at10TensorBase14const_data_ptrI*Li0EEEPK*v"
|
||||
// when libtorch_hip.so is loaded.
|
||||
//
|
||||
// Reference: https://github.com/pytorch/pytorch/issues/173707
|
||||
// (closed as not planned; pytorch treats this as a clang/ROCm gap)
|
||||
//
|
||||
// Bridge strategy:
|
||||
// Provide every plausibly-missing mangling as a weak free function
|
||||
// linked into libtorch_cpu.so with default visibility. Where the cpp
|
||||
// specialisation already provides the same mangled name as a strong
|
||||
// symbol, the linker discards the bridge weak symbol and uses the
|
||||
// cpp version (which preserves the runtime check_type call). Where
|
||||
// the cpp does NOT emit the same mangling, the bridge weak symbol
|
||||
// fills the gap so libtorch_hip.so dlopen resolves. Each bridge body
|
||||
// delegates to the non-templated public accessor on TensorBase,
|
||||
// which returns the raw underlying data pointer.
|
||||
//
|
||||
// Semantics note:
|
||||
// The non-templated TensorBase::const_data_ptr() / mutable_data_ptr() /
|
||||
// data_ptr() skip the scalar-type runtime check that the templated
|
||||
// specialisations perform. In practice this only matters for HIP code
|
||||
// paths that already dispatched on dtype before reaching this call —
|
||||
// which is the common case in ATen kernels.
|
||||
//
|
||||
// gemm stubs from the upstream issue are intentionally NOT included:
|
||||
// `nm -DC --undefined-only libtorch_hip.so` on this build shows no
|
||||
// undefined at::cuda::blas::gemm symbols, and the upstream stubs have
|
||||
// empty bodies (silent functional failure if ever called).
|
||||
|
||||
#include <ATen/core/TensorBase.h>
|
||||
#include <c10/util/BFloat16.h>
|
||||
#include <c10/util/Float8_e4m3fn.h>
|
||||
#include <c10/util/Float8_e4m3fnuz.h>
|
||||
#include <c10/util/Float8_e5m2.h>
|
||||
#include <c10/util/Float8_e5m2fnuz.h>
|
||||
#include <c10/util/Float8_e8m0fnu.h>
|
||||
#include <c10/util/Half.h>
|
||||
#include <c10/util/complex.h>
|
||||
#include <c10/util/qint8.h>
|
||||
#include <c10/util/qint32.h>
|
||||
#include <c10/util/quint8.h>
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma GCC visibility push(default)
|
||||
|
||||
extern "C" {
|
||||
|
||||
// `weak` is essential: the cpp's TensorMethods.cpp specialisation emits
|
||||
// the same mangled name as a strong global on some build configurations.
|
||||
// Without `weak` the link step fails with a duplicate-symbol error.
|
||||
#define BRIDGE_READ(MangledName) \
|
||||
__attribute__((weak, visibility("default"))) \
|
||||
const void* MangledName(const at::TensorBase* t) { return t->const_data_ptr(); }
|
||||
|
||||
#define BRIDGE_WRITE(MangledName) \
|
||||
__attribute__((weak, visibility("default"))) \
|
||||
void* MangledName(const at::TensorBase* t) { return t->mutable_data_ptr(); }
|
||||
|
||||
// ---- const_data_ptr<T, 0> (non-const T, plain Li0E form) ----
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIaLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIbLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIdLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIfLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIhLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIiLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIjLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIlLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrImLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIsLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrItLi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c104HalfELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c108BFloat16ELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c107complexIdEELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c107complexIfEELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c107complexINS2_4HalfEEELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c1011Float8_e5m2ELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c1013Float8_e4m3fnELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c1014Float8_e8m0fnuELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c1015Float8_e4m3fnuzELi0EEEPKT_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c1015Float8_e5m2fnuzELi0EEEPKT_v)
|
||||
|
||||
// ---- const_data_ptr<KT, 0> (const-qualified T, plain Li0E form) ----
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKaLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKbLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKdLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKfLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKhLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKiLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKjLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKlLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKmLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKsLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKtLi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c104HalfELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c108BFloat16ELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c107complexIdEELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c107complexIfEELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c107complexINS2_4HalfEEELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c105qint8ELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c106qint32ELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c106quint8ELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c1011Float8_e5m2ELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c1013Float8_e4m3fnELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c1014Float8_e8m0fnuELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c1015Float8_e4m3fnuzELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIKN3c1015Float8_e5m2fnuzELi0EEEPKNSt12remove_constIT_E4typeEv)
|
||||
|
||||
// ---- const_data_ptr<T, Tn enable_if<!is_const_v<T>>... 0> (SFINAE form, non-const T) ----
|
||||
// libtorch_hip.so emits these for a few primitives even when most TUs use the Li0E form.
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIdTnNSt9enable_ifIXntsr3stdE10is_const_vIT_EEiE4typeELi0EEEPKS3_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIfTnNSt9enable_ifIXntsr3stdE10is_const_vIT_EEiE4typeELi0EEEPKS3_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIiTnNSt9enable_ifIXntsr3stdE10is_const_vIT_EEiE4typeELi0EEEPKS3_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIlTnNSt9enable_ifIXntsr3stdE10is_const_vIT_EEiE4typeELi0EEEPKS3_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c104HalfETnNSt9enable_ifIXntsr3stdE10is_const_vIT_EEiE4typeELi0EEEPKS5_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c108BFloat16ETnNSt9enable_ifIXntsr3stdE10is_const_vIT_EEiE4typeELi0EEEPKS5_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c107complexIdEETnNSt9enable_ifIXntsr3stdE10is_const_vIT_EEiE4typeELi0EEEPKS6_v)
|
||||
BRIDGE_READ(_ZNK2at10TensorBase14const_data_ptrIN3c107complexIfEETnNSt9enable_ifIXntsr3stdE10is_const_vIT_EEiE4typeELi0EEEPKS6_v)
|
||||
|
||||
// ---- mutable_data_ptr<T> ----
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIaEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIbEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIdEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIfEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIhEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIiEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIjEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIlEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrImEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIsEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrItEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c104HalfEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c108BFloat16EEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c107complexIdEEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c107complexIfEEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c107complexINS2_4HalfEEEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c1011Float8_e5m2EEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c1013Float8_e4m3fnEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c1014Float8_e8m0fnuEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c1015Float8_e4m3fnuzEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c1015Float8_e5m2fnuzEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c105qint8EEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c106qint32EEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase16mutable_data_ptrIN3c106quint8EEEPT_v)
|
||||
|
||||
// ---- data_ptr<T> (legacy mutable accessor) ----
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIaEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIbEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIdEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIfEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIhEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIiEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIlEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIsEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIN3c104HalfEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIN3c108BFloat16EEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIN3c107complexIdEEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIN3c107complexIfEEEEPT_v)
|
||||
BRIDGE_WRITE(_ZNK2at10TensorBase8data_ptrIN3c107complexINS2_4HalfEEEEEPT_v)
|
||||
|
||||
#undef BRIDGE_READ
|
||||
#undef BRIDGE_WRITE
|
||||
|
||||
} // extern "C"
|
||||
|
||||
#pragma GCC visibility pop
|
||||
#pragma GCC diagnostic pop
|
||||
// === openRuyi ROCm symbol bridge end ===
|
||||
@@ -0,0 +1,44 @@
|
||||
diff --git a/setup.py b/setup.py
|
||||
index fe4a78d..788dc1c 100644
|
||||
--- a/setup.py
|
||||
+++ b/setup.py
|
||||
@@ -463,7 +463,14 @@
|
||||
"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", "-DLLVM_ENABLE_WERROR=ON",
|
||||
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, "-DTRITON_BUILD_PYTHON_MODULE=ON",
|
||||
"-DPython3_EXECUTABLE:FILEPATH=" + sys.executable, "-DPython3_INCLUDE_DIR=" + python_include_dir,
|
||||
- "-DTRITON_CODEGEN_BACKENDS=" + ';'.join([b.name for b in backends if not b.is_external]),
|
||||
+ # openRuyi: core Triton hard-depends on the NVGPU/NVWS dialects that
|
||||
+ # live under third_party/nvidia (TritonGPUTransforms and
|
||||
+ # TritonInstrumentToLLVM include their TableGen output and link
|
||||
+ # NVGPUIR/NVWSIR), so the nvidia backend must stay in the CMake
|
||||
+ # build even though its Python side is not packaged (see the
|
||||
+ # `backends` list below).
|
||||
+ "-DTRITON_CODEGEN_BACKENDS=" +
|
||||
+ ';'.join([b.name for b in backends if not b.is_external] + ["nvidia"]),
|
||||
"-DTRITON_PLUGIN_DIRS=" + ';'.join([b.src_dir for b in backends if b.is_external]),
|
||||
"-DTRITON_WHEEL_DIR=" + wheeldir
|
||||
]
|
||||
@@ -534,6 +541,10 @@
|
||||
|
||||
|
||||
def download_and_copy_dependencies():
|
||||
+ # openRuyi: this package ships only the AMD/ROCm backend, so the NVIDIA
|
||||
+ # CUDA toolchain (ptxas, cuobjdump, ...) is neither needed nor downloaded.
|
||||
+ # Skipping this also keeps the build fully offline for the OBS sandbox.
|
||||
+ return
|
||||
nvidia_version_path = os.path.join(get_base_dir(), "cmake", "nvidia-toolchain-version.json")
|
||||
with open(nvidia_version_path, "r") as nvidia_version_file:
|
||||
# parse this json file to get the version of the nvidia toolchain
|
||||
@@ -619,7 +630,11 @@
|
||||
)
|
||||
|
||||
|
||||
-backends = [*BackendInstaller.copy(["nvidia", "amd"]), *BackendInstaller.copy_externals()]
|
||||
+# openRuyi: ship the AMD/ROCm backend only. The NVIDIA C++ libraries are
|
||||
+# still compiled into libtriton (core requires them; see the cmake_args note
|
||||
+# above), but the NVIDIA Python backend -- which would bundle ptxas and the
|
||||
+# proprietary libdevice.10.bc -- is intentionally not packaged.
|
||||
+backends = [*BackendInstaller.copy(["amd"]), *BackendInstaller.copy_externals()]
|
||||
|
||||
|
||||
def get_package_dirs():
|
||||
@@ -0,0 +1,21 @@
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index c9620e3..6c1fbb0 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -262,6 +262,16 @@ if(TRITON_BUILD_PYTHON_MODULE)
|
||||
LLVMPowerPCAsmParser
|
||||
LLVMPowerPCCodeGen
|
||||
)
|
||||
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64")
|
||||
+ # There is no LLVM RISC-V GPU target; Triton never emits host code on
|
||||
+ # riscv64. However llvm::InitializeAllTargets() (referenced from
|
||||
+ # llvm.cc) pulls in the X86 codegen symbols, so link them here to avoid
|
||||
+ # an "undefined symbol: LLVMInitializeX86Target" failure at import time.
|
||||
+ # The matching bundled LLVM must therefore be built with the X86 target.
|
||||
+ list(APPEND TRITON_LIBRARIES
|
||||
+ LLVMX86CodeGen
|
||||
+ LLVMX86AsmParser
|
||||
+ )
|
||||
else()
|
||||
message(FATAL_ERROR "LLVM codegen/ASM parser libs: This HW architecture (${CMAKE_SYSTEM_PROCESSOR}) is not configured in cmake lib dependencies.")
|
||||
endif()
|
||||
@@ -0,0 +1,25 @@
|
||||
diff --git a/setup.py b/setup.py
|
||||
index fe4a78d..400c34f 100644
|
||||
--- a/setup.py
|
||||
+++ b/setup.py
|
||||
@@ -424,10 +424,19 @@ class CMakeBuild(build_ext):
|
||||
def get_pybind11_cmake_args(self):
|
||||
pybind11_sys_path = get_env_with_keys(["PYBIND11_SYSPATH"])
|
||||
if pybind11_sys_path:
|
||||
+ # openRuyi: distro pybind11 packages install the headers and the
|
||||
+ # CMake config under a filesystem prefix (include/pybind11 and
|
||||
+ # share/cmake/pybind11 below /usr), not inside the Python package
|
||||
+ # the way pip wheels do, so pybind11.get_cmake_dir() raises
|
||||
+ # ImportError ("pybind11 not installed"). When PYBIND11_SYSPATH
|
||||
+ # is given, derive the CMake dir from it as well instead of only
|
||||
+ # the include dir.
|
||||
pybind11_include_dir = os.path.join(pybind11_sys_path, "include")
|
||||
+ pybind11_cmake_dir = os.path.join(pybind11_sys_path, "share", "cmake", "pybind11")
|
||||
else:
|
||||
pybind11_include_dir = pybind11.get_include()
|
||||
- return [f"-Dpybind11_INCLUDE_DIR='{pybind11_include_dir}'", f"-Dpybind11_DIR='{pybind11.get_cmake_dir()}'"]
|
||||
+ pybind11_cmake_dir = pybind11.get_cmake_dir()
|
||||
+ return [f"-Dpybind11_INCLUDE_DIR='{pybind11_include_dir}'", f"-Dpybind11_DIR='{pybind11_cmake_dir}'"]
|
||||
|
||||
def get_proton_cmake_args(self):
|
||||
cmake_args = get_thirdparty_packages([get_json_package_info()])
|
||||
@@ -0,0 +1,210 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
#
|
||||
# Originally extracted from Fedora Project
|
||||
# Authors: The Fedora Project Contributors
|
||||
# riscv64 build hints contributed by the openRuyi AI working group.
|
||||
|
||||
%global srcname triton
|
||||
|
||||
# Triton pins an exact, in-development LLVM *commit* (not a release version).
|
||||
# It calls unstable MLIR/LLVM C++ internals, so it only builds against that one
|
||||
# revision; no released distro LLVM (nor ROCm's bundled LLVM) matches it, and it
|
||||
# additionally needs MLIR and LLD. We therefore build LLVM from source at the
|
||||
# pinned commit and link it statically into the Triton extension, exactly like
|
||||
# upstream's CI does.
|
||||
#
|
||||
# !!! WHEN BUMPING %%{version} !!!
|
||||
# Triton and LLVM must move together. Set %%{llvm_commit} to the value of
|
||||
# cmake/llvm-hash.txt for the new Triton tag and refresh Source1's sha256. A
|
||||
# mismatched LLVM will fail to compile or crash at runtime.
|
||||
%global llvm_commit f6ded0be897e2878612dd903f7e8bb85448269e5
|
||||
|
||||
# Build everything (the bundled LLVM and the Triton extension) with clang,
|
||||
# matching the rest of the openRuyi ROCm stack.
|
||||
%global toolchain clang
|
||||
|
||||
# The bundled static LLVM is large; drop LTO and skip the dwz pass which can
|
||||
# exhaust memory on the giant libtriton.so.
|
||||
%global _lto_cflags %{nil}
|
||||
%define _find_debuginfo_dwz_opts %{nil}
|
||||
|
||||
Name: python-%{srcname}
|
||||
Version: 3.6.0
|
||||
Release: %autorelease
|
||||
Summary: A language and compiler for custom Deep Learning operations
|
||||
# Triton itself is MIT. The statically bundled LLVM/MLIR/LLD is
|
||||
# "Apache-2.0 WITH LLVM-exception OR NCSA"; pybind11 headers are BSD-3-Clause.
|
||||
License: MIT AND (Apache-2.0 WITH LLVM-exception OR NCSA) AND BSD-3-Clause
|
||||
URL: https://github.com/triton-lang/triton
|
||||
|
||||
# Triton's PyPI sdist does not ship the C++ / third_party sources needed to
|
||||
# build, so the source is taken from the GitHub release tag instead.
|
||||
#!RemoteAsset: sha256:be270ed11ca5a8fbd9d7941c5bbe9a23a9f6e2ffd372c8398346928bee464774
|
||||
Source0: %{url}/archive/refs/tags/v%{version}.tar.gz#/%{srcname}-%{version}.tar.gz
|
||||
# NOTE: codeload generates llvm-project's commit archive on the fly; the
|
||||
# github.com/.../archive redirect to it times out behind the build proxy, so
|
||||
# point straight at codeload (identical bytes, same sha256).
|
||||
#!RemoteAsset: sha256:f63c624aa63eda73508b9df2be2a6945ea4fddbee58615fbe1cd747b6884dd5e
|
||||
Source1: https://github.com/llvm/llvm-project/archive/%{llvm_commit}.tar.gz
|
||||
|
||||
# Ship only the AMD/ROCm Python backend and never reach out to the network
|
||||
# for the NVIDIA CUDA toolchain (ptxas, libdevice, ...). The NVIDIA C++
|
||||
# libraries are still compiled into libtriton: Triton core hard-depends on
|
||||
# the NVGPU/NVWS dialects living under third_party/nvidia.
|
||||
Patch0: 0001-Ship-only-the-AMD-ROCm-backend-offline.patch
|
||||
# Link the X86 codegen libraries on the riscv64 host so that
|
||||
# llvm::InitializeAllTargets() resolves at import time.
|
||||
Patch1: 0002-Add-riscv64-host-codegen-libraries.patch
|
||||
# pybind11.get_cmake_dir() only knows the pip-wheel layout and raises
|
||||
# ImportError with a distro python3-pybind11, so let PYBIND11_SYSPATH (set in
|
||||
# %%build) supply the CMake dir as well. The unconditional call came in with
|
||||
# https://github.com/triton-lang/triton/pull/4450
|
||||
Patch2: 0003-Use-PYBIND11_SYSPATH-for-the-pybind11-CMake-dir-too.patch
|
||||
BuildSystem: pyproject
|
||||
|
||||
BuildOption(install): %{srcname}
|
||||
|
||||
# --- Python build backend --------------------------------------------------
|
||||
BuildRequires: pyproject-rpm-macros
|
||||
BuildRequires: pkgconfig(python3)
|
||||
BuildRequires: python3dist(pip)
|
||||
BuildRequires: python3dist(setuptools)
|
||||
BuildRequires: python3dist(wheel)
|
||||
BuildRequires: python3dist(pybind11)
|
||||
# Supplies %%{_includedir}/pybind11 and %%{_datadir}/cmake/pybind11, which
|
||||
# PYBIND11_SYSPATH points the build at (see Patch2).
|
||||
BuildRequires: pkgconfig(pybind11)
|
||||
|
||||
# --- Toolchain for the bundled LLVM and the Triton extension ---------------
|
||||
BuildRequires: clang
|
||||
BuildRequires: lld
|
||||
BuildRequires: libstdc++-devel
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: cmake
|
||||
BuildRequires: ninja
|
||||
|
||||
# --- Libraries the bundled LLVM links against ------------------------------
|
||||
BuildRequires: pkgconfig(libffi)
|
||||
BuildRequires: pkgconfig(libxml-2.0)
|
||||
BuildRequires: pkgconfig(zlib)
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
|
||||
# --- Runtime ROCm stack ----------------------------------------------------
|
||||
# Triton JIT-compiles kernels at runtime: the GPU path uses the statically
|
||||
# linked LLD + AMDGPU code generator, but the per-kernel CPU launcher shim is
|
||||
# compiled on the fly (triton/runtime/build.py) with a host C compiler against
|
||||
# the Python headers, and the HIP runtime is dlopen'd.
|
||||
Requires: gcc
|
||||
Requires: pkgconfig(python3)
|
||||
Requires: cmake(hip)
|
||||
Requires: rocm-device-libs
|
||||
|
||||
Provides: python3-%{srcname} = %{version}-%{release}
|
||||
Provides: python3-%{srcname}%{?_isa} = %{version}-%{release}
|
||||
%python_provide python3-%{srcname}
|
||||
|
||||
%description
|
||||
Triton is a language and compiler for writing highly efficient custom
|
||||
Deep-Learning primitives. The aim of Triton is to provide an open-source
|
||||
environment to write fast code at higher productivity than CUDA, but also
|
||||
with higher flexibility than other existing DSLs.
|
||||
|
||||
This build ships the AMD ROCm (HIP) backend.
|
||||
|
||||
%prep -a
|
||||
# Unpack the pinned LLVM next to the Triton tree (built in %%build).
|
||||
tar -xf %{SOURCE1}
|
||||
|
||||
# Drop any pre-generated metadata shipped in the tarball.
|
||||
rm -rf %{srcname}.egg-info
|
||||
|
||||
# Triton's CMake turns warnings into errors; a from-source LLVM occasionally
|
||||
# emits new warnings, so relax this for both Triton and the embedded
|
||||
# add_llvm/add_mlir targets.
|
||||
sed -i -e 's@ -Werror @ @' CMakeLists.txt
|
||||
|
||||
# The wheel is built with --no-build-isolation, so cmake/ninja/pybind11 are
|
||||
# supplied as system BuildRequires. Strip them from build-system.requires so
|
||||
# %%pyproject_buildrequires does not emit unsatisfiable python3dist(cmake<4),
|
||||
# python3dist(ninja) dependencies.
|
||||
sed -i -e 's@^requires = .*@requires = ["setuptools>=40.8.0", "wheel"]@' pyproject.toml
|
||||
|
||||
%generate_buildrequires
|
||||
%pyproject_buildrequires
|
||||
|
||||
# Build the pinned LLVM+MLIR+LLD first, then let the pyproject build system
|
||||
# compile the Triton wheel against it. Both run in the same shell, so the
|
||||
# environment exported here reaches %%pyproject_wheel.
|
||||
%build -p
|
||||
llvm_src="$(pwd)/llvm-project-%{llvm_commit}"
|
||||
llvm_install="$(pwd)/llvm-install"
|
||||
|
||||
# Cap parallelism by available memory: LLVM/MLIR compile units and the final
|
||||
# Triton link are memory hungry and will thrash or OOM otherwise.
|
||||
mem_gb=$(awk '/MemTotal/ {print int($2/1024/1024)}' /proc/meminfo)
|
||||
compile_jobs=$(nproc)
|
||||
mem_jobs=$(( 1 + mem_gb / 2 ))
|
||||
[ "$mem_jobs" -lt "$compile_jobs" ] && compile_jobs=$mem_jobs
|
||||
[ "$compile_jobs" -lt 1 ] && compile_jobs=1
|
||||
# Linking the static archives needs far more memory per job.
|
||||
link_jobs=$(( 1 + mem_gb / 16 ))
|
||||
[ "$link_jobs" -lt 1 ] && link_jobs=1
|
||||
|
||||
%ifarch x86_64
|
||||
llvm_targets="X86;AMDGPU;NVPTX"
|
||||
%endif
|
||||
%ifarch riscv64
|
||||
# X86 is required by the riscv64 codegen-libs patch; AMDGPU drives the ROCm
|
||||
# backend; NVPTX is always linked by Triton's core; RISCV is the host.
|
||||
llvm_targets="RISCV;X86;AMDGPU;NVPTX"
|
||||
%endif
|
||||
|
||||
cmake -S "$llvm_src/llvm" -B "$llvm_src/build" -G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX="$llvm_install" \
|
||||
-DCMAKE_C_COMPILER=clang \
|
||||
-DCMAKE_CXX_COMPILER=clang++ \
|
||||
-DLLVM_USE_LINKER=lld \
|
||||
-DLLVM_ENABLE_PROJECTS="mlir;lld" \
|
||||
-DLLVM_TARGETS_TO_BUILD="$llvm_targets" \
|
||||
-DLLVM_ENABLE_ASSERTIONS=OFF \
|
||||
-DBUILD_SHARED_LIBS=OFF \
|
||||
-DLLVM_BUILD_LLVM_DYLIB=OFF \
|
||||
-DLLVM_INSTALL_UTILS=ON \
|
||||
-DLLVM_ENABLE_TERMINFO=OFF \
|
||||
-DLLVM_ENABLE_ZSTD=ON \
|
||||
-DLLVM_INCLUDE_BENCHMARKS=OFF \
|
||||
-DLLVM_INCLUDE_EXAMPLES=OFF \
|
||||
-DLLVM_INCLUDE_TESTS=OFF \
|
||||
-DLLVM_PARALLEL_COMPILE_JOBS=$compile_jobs \
|
||||
-DLLVM_PARALLEL_LINK_JOBS=$link_jobs
|
||||
cmake --build "$llvm_src/build" --target install -- -j$compile_jobs
|
||||
|
||||
# Point Triton at the freshly built LLVM and keep the build offline + ROCm-only.
|
||||
export LLVM_SYSPATH="$llvm_install"
|
||||
export PATH="$llvm_install/bin:$PATH"
|
||||
# System pybind11 from pybind11-devel: headers and CMake config under
|
||||
# %%{_prefix} (see Patch2).
|
||||
export PYBIND11_SYSPATH=%{_prefix}
|
||||
export CC=clang
|
||||
export CXX=clang++
|
||||
export MAX_JOBS=$compile_jobs
|
||||
export TRITON_PARALLEL_LINK_JOBS=$link_jobs
|
||||
export TRITON_BUILD_WITH_CLANG_LLD=ON
|
||||
export TRITON_BUILD_WITH_CCACHE=OFF
|
||||
# Proton needs CUPTI/roctracer/json; not needed for a plain ROCm backend.
|
||||
export TRITON_BUILD_PROTON=OFF
|
||||
# Don't fetch googletest, and don't trip over new LLVM warnings.
|
||||
export TRITON_APPEND_CMAKE_ARGS="-DTRITON_BUILD_UT=OFF -DLLVM_ENABLE_WERROR=OFF"
|
||||
|
||||
%files -f %{pyproject_files}
|
||||
# Triton's own LICENSE is captured from the wheel metadata by
|
||||
# %%pyproject_save_files; only the bundled LLVM license must be added by hand.
|
||||
%license llvm-project-%{llvm_commit}/llvm/LICENSE.TXT
|
||||
%doc README.md
|
||||
|
||||
%changelog
|
||||
%autochangelog
|
||||
+61
-193
@@ -1,109 +1,60 @@
|
||||
%global upstreamname RCCL
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}/
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%endif
|
||||
%global rccl_name rccl%{pkg_suffix}
|
||||
# rocm stack builds with clang
|
||||
%global toolchain clang
|
||||
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-flto=thin//' -e 's/-mtls-dialect=gnu2//')
|
||||
|
||||
%global _lto_cflags %{nil}
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
# downloads tests, use mock --enable-network
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%global __brp_check_rpaths %{nil}
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
%bcond export 0
|
||||
%if %{with export}
|
||||
%global build_compile_db ON
|
||||
%else
|
||||
%global build_compile_db OFF
|
||||
%endif
|
||||
|
||||
# rccl is not supported on gfx1103
|
||||
# On 6.1.1
|
||||
# lld: error: ld-temp.o <inline asm>:1:25: specified hardware register is not supported on this GPU
|
||||
# s_getreg_b32 s1, hwreg(HW_REG_HW_ID)
|
||||
#
|
||||
# On 6.2
|
||||
# Problems reported with gfx10, removing gfx10 and default (gfx10 and gfx11) from build list
|
||||
#
|
||||
%global gpu_list %{rocm_gpu_list_rccl}
|
||||
%global _gpu_list gfx1100
|
||||
|
||||
Name: %{rccl_name}
|
||||
Name: rccl
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
Release: %autorelease
|
||||
Summary: ROCm Communication Collectives Library
|
||||
|
||||
Url: https://github.com/ROCm/rccl
|
||||
License: BSD-3-Clause AND MIT AND Apache-2.0
|
||||
# From License.txt the main license is BSD 3
|
||||
# Modifications from Microsoft is MIT
|
||||
# The NVIDIA based header files below are Apache-2.0
|
||||
# src/include/nvtx3/nv*.h and similar
|
||||
# The URL for NVIDIA in the License.txt https://github.com/NVIDIA/NVTX is Apache-2.0
|
||||
Url: https://github.com/ROCm/rccl
|
||||
#!RemoteAsset: sha256:eaa60bcf62feb3198553f2bcf6dcbfdfcecd0fdfabda41f1dae7d3f15fadbd68
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz#/%{upstreamname}-%{rocm_version}.tar.gz
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DEXPLICIT_ROCM_VERSION=%{rocm_version}
|
||||
BuildOption(conf): -DROCM_PATH=%{_prefix}
|
||||
BuildOption(conf): -DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
BuildOption(conf): -DBUILD_TESTS=ON
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: hipify%{pkg_suffix}
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: fmt-devel
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(fmt)
|
||||
BuildRequires: cmake(GTest)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocm_smi)
|
||||
BuildRequires: cmake(rocm-core)
|
||||
BuildRequires: cmake(rocprofiler-register)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: hipify
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: python3
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-core%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
BuildRequires: rocm-smi%{pkg_suffix}-devel
|
||||
|
||||
%if %{with test}
|
||||
BuildRequires: gtest-devel
|
||||
%endif
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
Requires: %{name}-data = %{version}-%{release}
|
||||
Provides: rccl%{pkg_suffix} = %{version}-%{release}
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
|
||||
%description
|
||||
RCCL (pronounced "Rickle") is a stand-alone library of standard
|
||||
@@ -122,144 +73,61 @@ algorithms and have been optimized for throughput and latency. For
|
||||
best performance, small operations can be either batched into
|
||||
larger operations or aggregated through the API.
|
||||
|
||||
%post -p /sbin/ldconfig
|
||||
%postun -p /sbin/ldconfig
|
||||
|
||||
%package devel
|
||||
%package devel
|
||||
Summary: Headers and libraries for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Provides: rccl%{pkg_suffix}-devel = %{version}-%{release}
|
||||
Provides: rccl-devel = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%description devel
|
||||
Headers and libraries for %{name}
|
||||
|
||||
%package data
|
||||
%package data
|
||||
Summary: Data for %{name}
|
||||
BuildArch: noarch
|
||||
|
||||
%description data
|
||||
%description data
|
||||
Data for %{name}
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -n rccl-rocm-%{version}
|
||||
|
||||
# Allow user to set AMDGPU_TARGETS
|
||||
sed -i -e '/AMD GPU targets to compile for/d' CMakeLists.txt
|
||||
|
||||
# No /opt/rocm/.info/version
|
||||
sed -i -e 's@cat ${ROCM_PATH}/.info/version@echo %{rocm_version}@' CMakeLists.txt
|
||||
|
||||
# wrong path
|
||||
# https://github.com/ROCm/rccl/issues/1649
|
||||
sed -i -e 's@rocm-core/rocm_version.h@rocm_version.h@' src/include/hip_rocm_version_info.h
|
||||
|
||||
# Problems building on SUSE
|
||||
# ENABLE_MSCCLPP=OFF
|
||||
sed -i -e 's@if (ENABLE_MSCCLPP AND NOT(${HOST_OS_ID} STREQUAL "ubuntu" OR ${HOST_OS_ID} STREQUAL "centos"))@if (ENABLE_MSCCLPP)@' CMakeLists.txt
|
||||
|
||||
# Building --with test
|
||||
# .../test/common/TestBed.cpp:607:16: error: no member named 'setfill' in namespace 'std'
|
||||
# 607 | ss << std::setfill(' ') << std::setw(20) << ncclFuncNames[funcType] << " ";
|
||||
# https://github.com/ROCm/rccl/issues/1749
|
||||
sed -i '/#include <map.*/a#include <iomanip>' test/common/TestBed.hpp
|
||||
|
||||
# On Tumbleweed Q3,2025
|
||||
# /usr/include/gtest/internal/gtest-port.h:273:2: error: C++ versions less than C++17 are not supported.
|
||||
# Convert the c++14 to c++17
|
||||
sed -i -e 's@set(CMAKE_CXX_STANDARD 14)@set(CMAKE_CXX_STANDARD 17)@' CMakeLists.txt
|
||||
|
||||
%prep -a
|
||||
# Do not force install
|
||||
sed -i -e 's@set(CMAKE_INSTALL_LIBDIR@#set(CMAKE_INSTALL_LIBDIR@' cmake/Dependencies.cmake
|
||||
|
||||
# RCCL uses -parallel-jobs for both compiling and linking
|
||||
# compiling is set to 12, which may be more than the cores on the build machine.
|
||||
# linking is set by reserving 16GB pre thread, can be too little.
|
||||
# Use our own heuristics here
|
||||
# Real cores, No hyperthreading
|
||||
COMPILE_JOBS=`cat /proc/cpuinfo | grep -m 1 'cpu cores' | awk '{ print $4 }'`
|
||||
if [ ${COMPILE_JOBS}x = x ]; then
|
||||
COMPILE_JOBS=1
|
||||
fi
|
||||
# Try again..
|
||||
if [ ${COMPILE_JOBS} = 1 ]; then
|
||||
COMPILE_JOBS=`lscpu | grep '^CPU(s)' | awk '{ print $2 }'`
|
||||
if [ ${COMPILE_JOBS}x = x ]; then
|
||||
COMPILE_JOBS=4
|
||||
fi
|
||||
fi
|
||||
|
||||
# Take into account memmory usage per core, do not thrash real memory
|
||||
# inflate this to prevent competing with normal compile jobs
|
||||
BUILD_MEM=16
|
||||
MEM_KB=0
|
||||
MEM_KB=`cat /proc/meminfo | grep MemTotal | awk '{ print $2 }'`
|
||||
MEM_MB=`eval "expr ${MEM_KB} / 1024"`
|
||||
MEM_GB=`eval "expr ${MEM_MB} / 1024"`
|
||||
COMPILE_JOBS_MEM=`eval "expr 1 + ${MEM_GB} / ${BUILD_MEM}"`
|
||||
if [ "$COMPILE_JOBS_MEM" -lt "$COMPILE_JOBS" ]; then
|
||||
COMPILE_JOBS=$COMPILE_JOBS_MEM
|
||||
fi
|
||||
LINK_MEM=65
|
||||
LINK_JOBS=`eval "expr 1 + ${MEM_GB} / ${LINK_MEM}"`
|
||||
|
||||
sed -i -e "s@rccl PRIVATE -parallel-jobs=12@rccl PRIVATE -parallel-jobs=${COMPILE_JOBS}@" CMakeLists.txt
|
||||
sed -i -e "s@-parallel-jobs=\${num_linker_jobs}@-parallel-jobs=${LINK_JOBS}@" CMakeLists.txt
|
||||
# -amdgpu-s-branch-bits and -amdgpu-long-branch-factor=2 are needed to avoid 'branch size exceed simm16' error
|
||||
# --lto-partitions to accelerate linking time
|
||||
sed -i -e 's@target_link_options(rccl PRIVATE "SHELL:-Xoffload-linker -mllvm=-amdgpu-kernarg-preload-count=16")@target_link_options(rccl PRIVATE "SHELL:-Xoffload-linker -mllvm=-amdgpu-s-branch-bits=15" "SHELL:-Xoffload-linker -mllvm=-amdgpu-long-branch-factor=2" "SHELL:-Xoffload-linker -mllvm=-amdgpu-kernarg-preload-count=16" "SHELL:-Xoffload-linker --lto-partitions=%(nproc)" "SHELL:-Xoffload-linker --verbose")@' CMakeLists.txt
|
||||
|
||||
%build
|
||||
%cmake \
|
||||
-DGPU_TARGETS=%{gpu_list} \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DBUILD_TESTS=%{build_test} \
|
||||
-DCMAKE_BUILD_TYPE=%{build_type} \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=%{build_compile_db} \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DCMAKE_SKIP_RPATH=ON \
|
||||
-DENABLE_MSCCLPP=OFF \
|
||||
-DEXPLICIT_ROCM_VERSION=%{rocm_version} \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DRCCL_ROCPROFILER_REGISTER=OFF \
|
||||
-DROCM_PATH=%{pkg_prefix} \
|
||||
-DROCM_SYMLINK_LIBS=OFF
|
||||
|
||||
# AMDGPU device linker runs as a process that produces no stdout for about 8~12 hours on riscv64
|
||||
timeout 12h bash -c 'while sleep 300; do echo "[heartbeat] $(date)"; done' & TIME_OUT=$!
|
||||
%cmake_build
|
||||
kill $TIME_OUT 2>/dev/null || true
|
||||
|
||||
%install
|
||||
%cmake_install
|
||||
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/rccl/LICENSE.txt
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/rccl/LICENSE.txt
|
||||
|
||||
%files
|
||||
%license LICENSE.txt
|
||||
%{pkg_prefix}/%{pkg_libdir}/librccl.so.*
|
||||
%{pkg_prefix}/bin/rcclras
|
||||
%{_libdir}/librccl.so.*
|
||||
%{_bindir}/rcclras
|
||||
|
||||
%files data
|
||||
%{pkg_prefix}/share/rccl/msccl-algorithms/
|
||||
%{pkg_prefix}/share/rccl/msccl-unit-test-algorithms/
|
||||
%{_datadir}/rccl/msccl-algorithms/
|
||||
%{_datadir}/rccl/msccl-unit-test-algorithms/
|
||||
|
||||
%files devel
|
||||
%doc README.md
|
||||
%{pkg_prefix}/include/rccl/
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/rccl/
|
||||
%{pkg_prefix}/%{pkg_libdir}/librccl.so
|
||||
%{_includedir}/rccl/
|
||||
%{_libdir}/cmake/rccl/
|
||||
%{_libdir}/librccl.so
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{pkg_prefix}/bin/rccl-UnitTests
|
||||
%endif
|
||||
%{_bindir}/rccl-UnitTests
|
||||
|
||||
%changelog
|
||||
* Mon Jan 26 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
From 87f3c0b3ebab78aa6d126633683720031d886313 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <trix@redhat.com>
|
||||
Date: Sat, 13 Jan 2024 14:36:01 -0500
|
||||
Subject: [PATCH] fixup install of tensile output
|
||||
|
||||
---
|
||||
library/src/CMakeLists.txt | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt
|
||||
index f4bdfb5f9742..316779134314 100644
|
||||
--- a/library/src/CMakeLists.txt
|
||||
+++ b/library/src/CMakeLists.txt
|
||||
@@ -823,7 +823,7 @@ if( BUILD_WITH_TENSILE )
|
||||
if (WIN32)
|
||||
set( ROCBLAS_TENSILE_LIBRARY_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}/bin/rocblas" CACHE PATH "path to tensile library" )
|
||||
else()
|
||||
- set( ROCBLAS_TENSILE_LIBRARY_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}${CMAKE_INSTALL_LIBDIR}/rocblas" CACHE PATH "path to tensile library" )
|
||||
+ set( ROCBLAS_TENSILE_LIBRARY_DIR "${CMAKE_INSTALL_LIBDIR}/rocblas" CACHE PATH "path to tensile library" )
|
||||
endif()
|
||||
# For ASAN package, Tensile library files(which are not shared libraries) are not required
|
||||
if( NOT ENABLE_ASAN_PACKAGING )
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,314 +0,0 @@
|
||||
diff --git a/clients/include/blas2/testing_gbmv.hpp b/clients/include/blas2/testing_gbmv.hpp
|
||||
index d02e1a5..bfe1046 100644
|
||||
--- a/clients/include/blas2/testing_gbmv.hpp
|
||||
+++ b/clients/include/blas2/testing_gbmv.hpp
|
||||
@@ -267,11 +267,11 @@ void testing_gbmv(const Arguments& arg)
|
||||
hy_gold = hy;
|
||||
|
||||
// copy data from CPU to device
|
||||
- dAb.transfer_from(hAb);
|
||||
- dx.transfer_from(hx);
|
||||
- dy.transfer_from(hy);
|
||||
- d_alpha.transfer_from(halpha);
|
||||
- d_beta.transfer_from(hbeta);
|
||||
+ CHECK_HIP_ERROR(dAb.transfer_from(hAb));
|
||||
+ CHECK_HIP_ERROR(dx.transfer_from(hx));
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy));
|
||||
+ CHECK_HIP_ERROR(d_alpha.transfer_from(halpha));
|
||||
+ CHECK_HIP_ERROR(d_beta.transfer_from(hbeta));
|
||||
|
||||
double cpu_time_used;
|
||||
double error_host = 0.0, error_device = 0.0;
|
||||
@@ -290,12 +290,12 @@ void testing_gbmv(const Arguments& arg)
|
||||
(handle, transA, M, N, KL, KU, &h_alpha, dAb, lda, dx, incx, &h_beta, dy, incy));
|
||||
handle.post_test(arg);
|
||||
|
||||
- hy.transfer_from(dy);
|
||||
+ CHECK_HIP_ERROR(hy.transfer_from(dy));
|
||||
}
|
||||
|
||||
if(arg.pointer_mode_device)
|
||||
{
|
||||
- dy.transfer_from(hy_gold);
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy_gold));
|
||||
|
||||
CHECK_ROCBLAS_ERROR(rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device));
|
||||
handle.pre_test(arg);
|
||||
@@ -308,7 +308,7 @@ void testing_gbmv(const Arguments& arg)
|
||||
{
|
||||
HOST_MEMCHECK(host_vector<T>, hy_copy, (dim_y, incy));
|
||||
// copy output from device to CPU
|
||||
- hy.transfer_from(dy);
|
||||
+ CHECK_HIP_ERROR(hy.transfer_from(dy));
|
||||
|
||||
// multi-GPU support
|
||||
int device_id, device_count;
|
||||
@@ -330,17 +330,17 @@ void testing_gbmv(const Arguments& arg)
|
||||
DEVICE_MEMCHECK(device_vector<T>, d_alpha_copy, (1));
|
||||
DEVICE_MEMCHECK(device_vector<T>, d_beta_copy, (1));
|
||||
|
||||
- dAb_copy.transfer_from(hAb);
|
||||
- dx_copy.transfer_from(hx);
|
||||
- d_alpha_copy.transfer_from(halpha);
|
||||
- d_beta_copy.transfer_from(hbeta);
|
||||
+ CHECK_HIP_ERROR(dAb_copy.transfer_from(hAb));
|
||||
+ CHECK_HIP_ERROR(dx_copy.transfer_from(hx));
|
||||
+ CHECK_HIP_ERROR(d_alpha_copy.transfer_from(halpha));
|
||||
+ CHECK_HIP_ERROR(d_beta_copy.transfer_from(hbeta));
|
||||
|
||||
CHECK_ROCBLAS_ERROR(
|
||||
rocblas_set_pointer_mode(handle_copy, rocblas_pointer_mode_device));
|
||||
|
||||
for(int runs = 0; runs < arg.iters; runs++)
|
||||
{
|
||||
- dy_copy.transfer_from(hy_gold);
|
||||
+ CHECK_HIP_ERROR(dy_copy.transfer_from(hy_gold));
|
||||
DAPI_CHECK(rocblas_gbmv_fn,
|
||||
(handle_copy,
|
||||
transA,
|
||||
@@ -357,7 +357,7 @@ void testing_gbmv(const Arguments& arg)
|
||||
dy_copy,
|
||||
incy));
|
||||
// copy output from device to CPU
|
||||
- hy_copy.transfer_from(dy_copy);
|
||||
+ CHECK_HIP_ERROR(hy_copy.transfer_from(dy_copy));
|
||||
unit_check_general<T>(1, dim_y, incy, hy, hy_copy);
|
||||
}
|
||||
}
|
||||
@@ -383,7 +383,7 @@ void testing_gbmv(const Arguments& arg)
|
||||
if(arg.pointer_mode_device)
|
||||
{
|
||||
// copy output from device to CPU
|
||||
- hy.transfer_from(dy);
|
||||
+ CHECK_HIP_ERROR(hy.transfer_from(dy));
|
||||
|
||||
if(arg.unit_check)
|
||||
{
|
||||
diff --git a/clients/include/blas2/testing_sbmv.hpp b/clients/include/blas2/testing_sbmv.hpp
|
||||
index feb1148..95e09e2 100644
|
||||
--- a/clients/include/blas2/testing_sbmv.hpp
|
||||
+++ b/clients/include/blas2/testing_sbmv.hpp
|
||||
@@ -204,9 +204,9 @@ void testing_sbmv(const Arguments& arg)
|
||||
hy_gold = hy;
|
||||
|
||||
// copy data from CPU to device
|
||||
- dx.transfer_from(hx);
|
||||
- dy.transfer_from(hy);
|
||||
- dAb.transfer_from(hAb);
|
||||
+ CHECK_HIP_ERROR(dx.transfer_from(hx));
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy));
|
||||
+ CHECK_HIP_ERROR(dAb.transfer_from(hAb));
|
||||
|
||||
double cpu_time_used;
|
||||
double error_host = 0.0, error_device = 0.0;
|
||||
@@ -231,7 +231,7 @@ void testing_sbmv(const Arguments& arg)
|
||||
CHECK_HIP_ERROR(d_alpha.transfer_from(alpha));
|
||||
CHECK_HIP_ERROR(d_beta.transfer_from(beta));
|
||||
|
||||
- dy.transfer_from(hy_gold);
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy_gold));
|
||||
|
||||
handle.pre_test(arg);
|
||||
|
||||
diff --git a/clients/include/blas2/testing_sbmv_batched.hpp b/clients/include/blas2/testing_sbmv_batched.hpp
|
||||
index 4812be9..9a91392 100644
|
||||
--- a/clients/include/blas2/testing_sbmv_batched.hpp
|
||||
+++ b/clients/include/blas2/testing_sbmv_batched.hpp
|
||||
@@ -322,9 +322,9 @@ void testing_sbmv_batched(const Arguments& arg)
|
||||
hy_gold.copy_from(hy);
|
||||
|
||||
// copy data from CPU to device
|
||||
- dx.transfer_from(hx);
|
||||
- dy.transfer_from(hy);
|
||||
- dAb.transfer_from(hAb);
|
||||
+ CHECK_HIP_ERROR(dx.transfer_from(hx));
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy));
|
||||
+ CHECK_HIP_ERROR(dAb.transfer_from(hAb));
|
||||
|
||||
double cpu_time_used;
|
||||
double h_error = 0.0, d_error = 0.0;
|
||||
@@ -363,7 +363,7 @@ void testing_sbmv_batched(const Arguments& arg)
|
||||
CHECK_HIP_ERROR(d_alpha.transfer_from(alpha));
|
||||
CHECK_HIP_ERROR(d_beta.transfer_from(beta));
|
||||
|
||||
- dy.transfer_from(hy_gold);
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy_gold));
|
||||
|
||||
handle.pre_test(arg);
|
||||
DAPI_CHECK(rocblas_sbmv_batched_fn,
|
||||
diff --git a/clients/include/blas2/testing_sbmv_strided_batched.hpp b/clients/include/blas2/testing_sbmv_strided_batched.hpp
|
||||
index a32538a..ad902a3 100644
|
||||
--- a/clients/include/blas2/testing_sbmv_strided_batched.hpp
|
||||
+++ b/clients/include/blas2/testing_sbmv_strided_batched.hpp
|
||||
@@ -385,9 +385,9 @@ void testing_sbmv_strided_batched(const Arguments& arg)
|
||||
hy_gold.copy_from(hy);
|
||||
|
||||
// copy data from CPU to device
|
||||
- dx.transfer_from(hx);
|
||||
- dy.transfer_from(hy);
|
||||
- dAb.transfer_from(hAb);
|
||||
+ CHECK_HIP_ERROR(dx.transfer_from(hx));
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy));
|
||||
+ CHECK_HIP_ERROR(dAb.transfer_from(hAb));
|
||||
|
||||
double cpu_time_used;
|
||||
double error_host = 0.0, error_device = 0.0;
|
||||
@@ -428,7 +428,7 @@ void testing_sbmv_strided_batched(const Arguments& arg)
|
||||
CHECK_HIP_ERROR(d_alpha.transfer_from(alpha));
|
||||
CHECK_HIP_ERROR(d_beta.transfer_from(beta));
|
||||
|
||||
- dy.transfer_from(hy_gold);
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy_gold));
|
||||
|
||||
handle.pre_test(arg);
|
||||
DAPI_CHECK(rocblas_sbmv_strided_batched_fn,
|
||||
diff --git a/clients/include/blas2/testing_symv.hpp b/clients/include/blas2/testing_symv.hpp
|
||||
index 2b31355..d170478 100644
|
||||
--- a/clients/include/blas2/testing_symv.hpp
|
||||
+++ b/clients/include/blas2/testing_symv.hpp
|
||||
@@ -213,7 +213,7 @@ void testing_symv(const Arguments& arg)
|
||||
CHECK_HIP_ERROR(d_alpha.transfer_from(alpha));
|
||||
CHECK_HIP_ERROR(d_beta.transfer_from(beta));
|
||||
|
||||
- dy.transfer_from(hy_gold);
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy_gold));
|
||||
|
||||
handle.pre_test(arg);
|
||||
DAPI_CHECK(rocblas_symv_fn,
|
||||
diff --git a/clients/include/blas2/testing_symv_batched.hpp b/clients/include/blas2/testing_symv_batched.hpp
|
||||
index 6fd3f7b..ceed6f3 100644
|
||||
--- a/clients/include/blas2/testing_symv_batched.hpp
|
||||
+++ b/clients/include/blas2/testing_symv_batched.hpp
|
||||
@@ -345,7 +345,7 @@ void testing_symv_batched(const Arguments& arg)
|
||||
CHECK_HIP_ERROR(d_alpha.transfer_from(alpha));
|
||||
CHECK_HIP_ERROR(d_beta.transfer_from(beta));
|
||||
|
||||
- dy.transfer_from(hy_gold);
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy_gold));
|
||||
|
||||
handle.pre_test(arg);
|
||||
DAPI_CHECK(rocblas_symv_batched_fn,
|
||||
diff --git a/clients/include/blas2/testing_symv_strided_batched.hpp b/clients/include/blas2/testing_symv_strided_batched.hpp
|
||||
index d96e17c..dbaa454 100644
|
||||
--- a/clients/include/blas2/testing_symv_strided_batched.hpp
|
||||
+++ b/clients/include/blas2/testing_symv_strided_batched.hpp
|
||||
@@ -433,7 +433,7 @@ void testing_symv_strided_batched(const Arguments& arg)
|
||||
CHECK_HIP_ERROR(d_alpha.transfer_from(alpha));
|
||||
CHECK_HIP_ERROR(d_beta.transfer_from(beta));
|
||||
|
||||
- dy.transfer_from(hy_gold);
|
||||
+ CHECK_HIP_ERROR(dy.transfer_from(hy_gold));
|
||||
|
||||
handle.pre_test(arg);
|
||||
DAPI_CHECK(rocblas_symv_strided_batched_fn,
|
||||
diff --git a/clients/include/blas_ex/testing_gemm_batched_ex.hpp b/clients/include/blas_ex/testing_gemm_batched_ex.hpp
|
||||
index 214f0b4..54ca0b5 100644
|
||||
--- a/clients/include/blas_ex/testing_gemm_batched_ex.hpp
|
||||
+++ b/clients/include/blas_ex/testing_gemm_batched_ex.hpp
|
||||
@@ -103,7 +103,7 @@ void testing_gemm_batched_ex_bad_arg(const Arguments& arg)
|
||||
rocblas_seedrand();
|
||||
rocblas_init_matrix<To>(
|
||||
hC, arg, rocblas_client_beta_sets_nan, rocblas_client_general_matrix);
|
||||
- dC.transfer_from(hC);
|
||||
+ CHECK_HIP_ERROR(dC.transfer_from(hC));
|
||||
|
||||
// clang-format off
|
||||
// check for invalid enum
|
||||
diff --git a/clients/include/blas_ex/testing_gemm_ex.hpp b/clients/include/blas_ex/testing_gemm_ex.hpp
|
||||
index 4977995..be36f2e 100644
|
||||
--- a/clients/include/blas_ex/testing_gemm_ex.hpp
|
||||
+++ b/clients/include/blas_ex/testing_gemm_ex.hpp
|
||||
@@ -102,7 +102,7 @@ void testing_gemm_ex_bad_arg(const Arguments& arg)
|
||||
HOST_MEMCHECK(host_matrix<To>, hC, (M, N, ldc));
|
||||
rocblas_seedrand();
|
||||
rocblas_init_matrix(hC, arg, rocblas_client_beta_sets_nan, rocblas_client_general_matrix);
|
||||
- dC.transfer_from(hC);
|
||||
+ CHECK_HIP_ERROR(dC.transfer_from(hC));
|
||||
|
||||
// clang-format off
|
||||
|
||||
diff --git a/clients/include/testing_set_get_matrix_async.hpp b/clients/include/testing_set_get_matrix_async.hpp
|
||||
index 01d0648..de8d301 100644
|
||||
--- a/clients/include/testing_set_get_matrix_async.hpp
|
||||
+++ b/clients/include/testing_set_get_matrix_async.hpp
|
||||
@@ -127,7 +127,7 @@ void testing_set_get_matrix_async(const Arguments& arg)
|
||||
|
||||
cpu_time_used = get_time_us_no_sync() - cpu_time_used;
|
||||
|
||||
- hipStreamSynchronize(stream);
|
||||
+ CHECK_HIP_ERROR(hipStreamSynchronize(stream));
|
||||
|
||||
if(arg.unit_check)
|
||||
{
|
||||
@@ -160,7 +160,7 @@ void testing_set_get_matrix_async(const Arguments& arg)
|
||||
(rows, cols, sizeof(T), dD, ldd, hB, ldb, stream));
|
||||
}
|
||||
|
||||
- hipStreamSynchronize(stream);
|
||||
+ CHECK_HIP_ERROR(hipStreamSynchronize(stream));
|
||||
gpu_time_used = get_time_us_sync(stream) - gpu_time_used;
|
||||
|
||||
ArgumentModel<e_M, e_N, e_lda, e_ldb, e_ldd>{}.log_args<T>(
|
||||
diff --git a/clients/include/testing_set_get_vector_async.hpp b/clients/include/testing_set_get_vector_async.hpp
|
||||
index b88cc0c..0bddc20 100644
|
||||
--- a/clients/include/testing_set_get_vector_async.hpp
|
||||
+++ b/clients/include/testing_set_get_vector_async.hpp
|
||||
@@ -113,7 +113,7 @@ void testing_set_get_vector_async(const Arguments& arg)
|
||||
|
||||
cpu_time_used = get_time_us_no_sync() - cpu_time_used;
|
||||
|
||||
- hipStreamSynchronize(stream);
|
||||
+ CHECK_HIP_ERROR(hipStreamSynchronize(stream));
|
||||
|
||||
if(arg.unit_check)
|
||||
{
|
||||
@@ -144,7 +144,7 @@ void testing_set_get_vector_async(const Arguments& arg)
|
||||
DAPI_DISPATCH(rocblas_get_vector_async_fn, (N, sizeof(T), db, ldd, hy, incy, stream));
|
||||
}
|
||||
|
||||
- hipStreamSynchronize(stream);
|
||||
+ CHECK_HIP_ERROR(hipStreamSynchronize(stream));
|
||||
gpu_time_used = get_time_us_sync(stream) - gpu_time_used;
|
||||
|
||||
ArgumentModel<e_N, e_incx, e_incy, e_ldd>{}.log_args<T>(rocblas_cout,
|
||||
diff --git a/library/src/include/handle.hpp b/library/src/include/handle.hpp
|
||||
index a0a1760..c80cc5e 100644
|
||||
--- a/library/src/include/handle.hpp
|
||||
+++ b/library/src/include/handle.hpp
|
||||
@@ -147,16 +147,20 @@ private:
|
||||
: device_id(device_id)
|
||||
, old_device_id(-1)
|
||||
{
|
||||
- hipGetDevice(&old_device_id);
|
||||
+ THROW_IF_HIP_ERROR(hipGetDevice(&old_device_id));
|
||||
if(device_id != old_device_id)
|
||||
- hipSetDevice(device_id);
|
||||
+ {
|
||||
+ THROW_IF_HIP_ERROR(hipSetDevice(device_id));
|
||||
+ }
|
||||
}
|
||||
|
||||
// Old device ID is restored on destruction
|
||||
~_rocblas_saved_device_id()
|
||||
{
|
||||
if(device_id != old_device_id)
|
||||
- hipSetDevice(old_device_id);
|
||||
+ {
|
||||
+ (void)(hipSetDevice(old_device_id));
|
||||
+ }
|
||||
}
|
||||
|
||||
// Move constructor
|
||||
diff --git a/library/src/src64/blas1/rocblas_dot_kernels_64.cpp b/library/src/src64/blas1/rocblas_dot_kernels_64.cpp
|
||||
index 0bd3061..2b38dd7 100644
|
||||
--- a/library/src/src64/blas1/rocblas_dot_kernels_64.cpp
|
||||
+++ b/library/src/src64/blas1/rocblas_dot_kernels_64.cpp
|
||||
@@ -325,7 +325,7 @@ rocblas_status rocblas_internal_dot_launcher_64(rocblas_handle __restrict__ hand
|
||||
if(handle->pointer_mode == rocblas_pointer_mode_host)
|
||||
{
|
||||
// sync here to match legacy BLAS
|
||||
- hipStreamSynchronize(handle->get_stream());
|
||||
+ RETURN_IF_HIP_ERROR(hipStreamSynchronize(handle->get_stream()));
|
||||
}
|
||||
|
||||
return rocblas_status_success;
|
||||
@@ -1,168 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# Tests consumes too much time and space
|
||||
%bcond test 0
|
||||
|
||||
%global rocm_version 7.1.1
|
||||
|
||||
Name: rocblas
|
||||
Summary: BLAS implementation for ROCm
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
License: MIT AND BSD-3-Clause
|
||||
URL: https://github.com/ROCm/rocBLAS
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DBLAS_INCLUDE_DIR=%{_includedir}/cblas
|
||||
BuildOption(conf): -DBLAS_LIBRARY=cblas
|
||||
BuildOption(conf): -DCMAKE_CXX_COMPILER=hipcc
|
||||
BuildOption(conf): -DCMAKE_C_COMPILER=clang
|
||||
BuildOption(conf): -DCMAKE_LINKER=%rocmllvm_bindir/ld.lld
|
||||
BuildOption(conf): -DCMAKE_AR=%rocmllvm_bindir/llvm-ar
|
||||
BuildOption(conf): -DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib
|
||||
BuildOption(conf): -DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/..
|
||||
BuildOption(conf): -DCMAKE_SKIP_RPATH=ON
|
||||
BuildOption(conf): -DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
BuildOption(conf): -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF
|
||||
# Avoid using external tensile
|
||||
BuildOption(conf): -DBUILD_WITH_PIP=OFF
|
||||
BuildOption(conf): -DROCM_SYMLINK_LIBS=OFF
|
||||
BuildOption(conf): -DHIP_PLATFORM=amd
|
||||
# These will be enabled in a long future
|
||||
BuildOption(conf): -DBUILD_CLIENTS_BENCHMARKS=%{?with_test:ON}%{!?with_test:OFF}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=%{?with_test:ON}%{!?with_test:OFF}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS_OPENMP=OFF
|
||||
BuildOption(conf): -DBUILD_FORTRAN_CLIENTS=OFF
|
||||
BuildOption(conf): -DBUILD_OFFLOAD_COMPRESS=ON
|
||||
BuildOption(conf): -DBUILD_WITH_HIPBLASLT=OFF
|
||||
BuildOption(conf): -DBUILD_WITH_TENSILE=ON
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DTensile_LIBRARY_FORMAT=msgpack
|
||||
BuildOption(conf): -DTensile_VERBOSE=1
|
||||
BuildOption(conf): -DTensile_DIR=$(%{_bindir}/TensileGetPath)/cmake
|
||||
BuildOption(conf): -DTensile_LOGIC=asm_full
|
||||
BuildOption(conf): -DTensile_CODE_OBJECT_VERSION=default
|
||||
BuildOption(conf): -DTensile_SEPARATE_ARCHITECTURES=ON
|
||||
BuildOption(conf): -DTensile_LAZY_LIBRARY_LOADING=ON
|
||||
BuildOption(conf): -DTensile_ASSEMBLER=clang++
|
||||
|
||||
Patch0: 0001-fixup-install-of-tensile-output.patch
|
||||
# https://github.com/ROCm/rocm-libraries/commit/6221075881f3ea8e9dfa0d985f22005c74ae1f52
|
||||
Patch1: 0002-fix-nodiscard-return-value-ignored.patch
|
||||
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(msgpack)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
BuildRequires: python3dist(tensile)
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%if %{with test}
|
||||
BuildRequires: gcc-fortran
|
||||
BuildRequires: cmake(openmp)
|
||||
BuildRequires: cmake(rocm_smi)
|
||||
BuildRequires: pkgconfig(blas)
|
||||
BuildRequires: pkgconfig(GTest)
|
||||
BuildRequires: python3dist(pyyaml)
|
||||
BuildRequires: rocminfo
|
||||
%endif
|
||||
|
||||
Provides: rocblas = %{version}-%{release}
|
||||
Requires: python3dist(msgpack)
|
||||
|
||||
%description
|
||||
rocBLAS is the AMD library for Basic Linear Algebra Subprograms
|
||||
(BLAS) on the ROCm platform. It is implemented in the HIP
|
||||
programming language and optimized for AMD GPUs.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Requires: rocm-hip-devel
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Requires: diffutils
|
||||
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep -a
|
||||
sed -i -e 's@target_link_libraries( rocblas-test PRIVATE ${BLAS_LIBRARY} ${GTEST_BOTH_LIBRARIES} roc::rocblas )@target_link_libraries( rocblas-test PRIVATE cblas ${GTEST_BOTH_LIBRARIES} roc::rocblas )@' clients/gtest/CMakeLists.txt
|
||||
|
||||
# no git in this build
|
||||
sed -i -e 's@find_package(Git REQUIRED)@find_package(Git)@' library/CMakeLists.txt
|
||||
|
||||
# /usr/include/gtest/internal/gtest-port.h:279:2: error: C++ versions less than C++14 are not supported.
|
||||
# 279 | #error C++ versions less than C++14 are not supported.
|
||||
sed -i -e 's@CXX_STANDARD 11@CXX_STANDARD 17@' clients/samples/CMakeLists.txt
|
||||
sed -i "s@/opt/rocm@%{_prefix}@g" \
|
||||
clients/cmake/FindROCmSMI.cmake \
|
||||
clients/CMakeLists.txt \
|
||||
rmake.py \
|
||||
rmake.py \
|
||||
rmake.py \
|
||||
toolchain-linux.cmake \
|
||||
header_compilation_tests.sh \
|
||||
library/src/tensile_host.cpp \
|
||||
library/src/include/handle.hpp \
|
||||
scripts/utilities/check_for_pretuned_sizes_c/Makefile \
|
||||
scripts/performance/blas/getspecs.py \
|
||||
scripts/performance/blas/commandrunner.py \
|
||||
CMakeLists.txt \
|
||||
library/CMakeLists.txt
|
||||
sed -i "s@llvm/bin@bin@g" CMakeLists.txt library/CMakeLists.txt
|
||||
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_prefix}/share/doc/rocblas/LICENSE.md
|
||||
|
||||
%check
|
||||
%if %{with test}
|
||||
export LD_LIBRARY_PATH=%{_vpath_builddir}/library/src:$LD_LIBRARY_PATH
|
||||
%{_vpath_builddir}/clients/staging/rocblas-test --gtest_brief=1
|
||||
%endif
|
||||
|
||||
%files
|
||||
%license LICENSE.md
|
||||
%{_libdir}/librocblas.so.5{,.*}
|
||||
%{_libdir}/rocblas/
|
||||
|
||||
%files devel
|
||||
%doc README.md
|
||||
%{_includedir}/rocblas/
|
||||
%{_libdir}/cmake/rocblas/
|
||||
%{_libdir}/librocblas.so
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{_bindir}/rocblas*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
%{autochangelog}
|
||||
+71
-194
@@ -1,234 +1,111 @@
|
||||
%global upstreamname rocFFT
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# Without a GPU, the test cases will fail with `what(): hipGetDeviceCount failed`
|
||||
# rocFFT needs a GPU to run tests, but we could still
|
||||
# keep the test cases for packagers who have a GPU, so make it optional.
|
||||
%bcond test 0
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%endif
|
||||
%global rocfft_name rocfft%{pkg_suffix}
|
||||
# rocm stack builds with clang
|
||||
%global toolchain clang
|
||||
|
||||
%global toolchain rocm
|
||||
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host-fcf-protection/')
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
# kernel oops on gfx1201
|
||||
# https://github.com/ROCm/rocFFT/issues/560
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
# Disable rpatch checks for a local build
|
||||
%global __brp_check_rpaths %{nil}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
# Option to test suite for testing on real HW:
|
||||
# May have to set gpu under test with
|
||||
# export HIP_VISIBLE_DEVICES=<num> - 0, 1 etc.
|
||||
%bcond check 0
|
||||
|
||||
# For docs
|
||||
%bcond doc 0
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
|
||||
# Use rocm-llvm strip
|
||||
%global __strip %rocmllvm_bindir/llvm-strip
|
||||
|
||||
# Use ninja if it is available
|
||||
%bcond ninja 1
|
||||
|
||||
%if %{with ninja}
|
||||
%global cmake_generator -G Ninja
|
||||
%else
|
||||
%global cmake_generator %{nil}
|
||||
%endif
|
||||
|
||||
%global cmake_config \\\
|
||||
-DBUILD_CLIENTS_TESTS_OPENMP=OFF \\\
|
||||
-DBUILD_CLIENTS_TESTS=%{build_test} \\\
|
||||
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \\\
|
||||
-DCMAKE_BUILD_TYPE=%{build_type} \\\
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \\\
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \\\
|
||||
-DCMAKE_CXX_FLAGS="--rtlib=compiler-rt --unwindlib=libgcc -fPIC" \\\
|
||||
-DCMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES=/usr/include \\\
|
||||
-DCMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES=/usr/include \\\
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \\\
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \\\
|
||||
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \\\
|
||||
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \\\
|
||||
-DHIPCC_BIN_DIR=%{_bindir} \\\
|
||||
-DHIP_COMPILER=%rocmllvm_bindir/clang++ \\\
|
||||
-DHIP_PLATFORM=amd \\\
|
||||
-DROCFFT_BUILD_OFFLINE_TUNER=OFF \\\
|
||||
-DROCFFT_KERNEL_CACHE_ENABLE=OFF \\\
|
||||
-DROCM_SYMLINK_LIBS=OFF \\\
|
||||
-DSQLITE_USE_SYSTEM_PACKAGE=ON \\\
|
||||
-Dhip_DIR=/usr/lib64/cmake/hip \\\
|
||||
-DCMAKE_PREFIX_PATH="%{rocmllvm_cmakedir}/..;%{_libdir}/cmake"
|
||||
# -DHIP_COMMON_DIR=$p/hip
|
||||
|
||||
%global gpu_list %{rocm_gpu_list_default}
|
||||
%global _gpu_list gfx1100
|
||||
|
||||
Name: rocfft%{pkg_suffix}
|
||||
Name: rocfft
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
Summary: ROCm Fast Fourier Transforms (FFT) library
|
||||
Release: %autorelease
|
||||
Summary: ROCm Fast Fourier Transforms library
|
||||
License: MIT
|
||||
Url: https://github.com/ROCm/rocFFT
|
||||
#!RemoteAsset: sha256:047e4e93e0b12869bf42136b5eb683df3a1635b01a58bbb25c8861df291ab285
|
||||
Source: %{url}/archive/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
Url: https://github.com/ROCm/%{upstreamname}
|
||||
Source0: %{url}/archive/rocm-%{version}.tar.gz#/%{upstreamname}-rocm-%{version}.tar.gz
|
||||
Patch0: 0001-cmake-use-gnu-installdirs.patch
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DAMDGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=ON
|
||||
BuildOption(conf): -DROCFFT_BUILD_OFFLINE_TUNER=OFF
|
||||
BuildOption(conf): -DROCFFT_KERNEL_CACHE_ENABLE=OFF
|
||||
BuildOption(conf): -DSQLITE_USE_SYSTEM_PACKAGE=ON
|
||||
|
||||
BuildRequires: python3
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: pkgconfig(sqlite3)
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel >= %{rocm_release}
|
||||
|
||||
%if %{with test}
|
||||
BuildRequires: rocrand%{pkg_suffix}-devel
|
||||
BuildRequires: fftw-devel
|
||||
BuildRequires: boost-devel
|
||||
BuildRequires: hiprand%{pkg_suffix}-devel
|
||||
|
||||
BuildRequires: gtest-devel
|
||||
|
||||
# rocfft-test compiles some things and requires rocm-hip-devel
|
||||
Requires: rocm-hip%{pkg_suffix}-devel >= %{rocm_release}
|
||||
|
||||
%endif
|
||||
|
||||
%if %{with doc}
|
||||
BuildRequires: python3dist(sphinx)
|
||||
%endif
|
||||
|
||||
%if %{with ninja}
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hiprand)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(GTest)
|
||||
BuildRequires: cmake(rocrand)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: libomp-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
%define __builder ninja
|
||||
%endif
|
||||
|
||||
Provides: rocfft%{pkg_suffix} = %{version}-%{release}
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
|
||||
Patch0: 0001-cmake-use-gnu-installdirs.patch
|
||||
BuildRequires: pkgconfig(fftw3)
|
||||
BuildRequires: pkgconfig(sqlite3)
|
||||
BuildRequires: python3
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%description
|
||||
A library for computing Fast Fourier Transforms (FFT), part of ROCm.
|
||||
rocFFT is a software library for computing fast Fourier transforms (FFTs) written
|
||||
in HIP. It is part of AMD's software ecosystem based on ROCm. In addition to
|
||||
AMD GPU hardware, rocFFT also works on CPU devices to facilitate testing.
|
||||
|
||||
%package devel
|
||||
%package devel
|
||||
Summary: The rocFFT development package
|
||||
Requires: %{rocfft_name}%{?_isa} = %{version}-%{release}
|
||||
Requires: rocm-hip%{pkg_suffix}-devel
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
Requires: cmake(hip)
|
||||
|
||||
%description devel
|
||||
%description devel
|
||||
The rocFFT development package.
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%autosetup -n %{upstreamname}-rocm-%{version} -p 1
|
||||
|
||||
%prep -a
|
||||
# Do not care so much about the sqlite version
|
||||
sed -i -e 's@SQLite3 3.50.2 @SQLite3 @' cmake/sqlite.cmake
|
||||
|
||||
%build
|
||||
# ensuring executables are PIE enabled
|
||||
export LDFLAGS="${LDFLAGS} -pie"
|
||||
|
||||
# OpenMP tests are disabled because upstream sets rpath in that case without
|
||||
# a way to skip
|
||||
#
|
||||
# RHEL 9 has an issue with missing symbol __truncsfhf2 in libgcc.
|
||||
# So switch from libgcc to rocm-llvm's libclang-rt.builtins with
|
||||
# the rtlib=compiler-rt. Leave unwind unchange with unwindlib=libgcc
|
||||
%cmake %{cmake_generator} %{cmake_config} \
|
||||
-DGPU_TARGETS=%{gpu_list}
|
||||
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
%cmake_install
|
||||
|
||||
# we don't need the rocfft_rtc_helper binary, don't package it
|
||||
%install -a
|
||||
# we don't need the rocfft_rtc_helper binary and client-info file
|
||||
find %{buildroot} -type f -name "rocfft_rtc_helper" -print0 | xargs -0 -I {} /usr/bin/rm -rf "{}"
|
||||
rm -rf %{buildroot}/%{_prefix}/.info
|
||||
rm -f %{buildroot}%{_datadir}/doc/rocfft/LICENSE.md
|
||||
|
||||
# we don't need or want the client-info file installed by rocfft
|
||||
rm -rf %{buildroot}/%{pkg_prefix}/.info
|
||||
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/rocfft/LICENSE.md
|
||||
|
||||
|
||||
%check
|
||||
%if %{with test}
|
||||
%if %{with check}
|
||||
%check
|
||||
%{_vpath_builddir}/clients/staging/rocfft-test
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%files -n %{rocfft_name}
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
|
||||
%{pkg_prefix}/%{pkg_libdir}/librocfft.so.0{,.*}
|
||||
%{_libdir}/librocfft.so.0{,.*}
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/include/rocfft/
|
||||
%{pkg_prefix}/%{pkg_libdir}/librocfft.so
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/rocfft/
|
||||
%{_includedir}/rocfft/
|
||||
%{_libdir}/cmake/rocfft/
|
||||
%{_libdir}/librocfft.so
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{pkg_prefix}/bin/rocfft-test
|
||||
%{pkg_prefix}/bin/rtc_helper_crash
|
||||
%endif
|
||||
%{_bindir}/rocfft-test
|
||||
%{_bindir}/rtc_helper_crash
|
||||
|
||||
%changelog
|
||||
* Mon Jan 26 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_version 7.1.1
|
||||
|
||||
Name: rocm-core
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: A utility to get the ROCm release version
|
||||
License: MIT
|
||||
URL: https://github.com/ROCm/rocm-core
|
||||
#!RemoteAsset: sha256:0171b82a4d028d57035d0d57a01a058f50f1a23959d230cdeab14972dcd94da8
|
||||
Source0: %{url}/archive/refs/tags/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -DROCM_VERSION=%{rocm_version}
|
||||
|
||||
BuildRequires: cmake
|
||||
|
||||
Provides: rocm-core = %{version}-%{release}
|
||||
|
||||
%description
|
||||
%{summary}
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%install -a
|
||||
rm -rvf %{buildroot}/%{_exec_prefix}/.info
|
||||
rm -rvf %{buildroot}/%{_exec_prefix}/libexec/rocm-core
|
||||
rm -rvf %{buildroot}/%{_exec_prefix}/share/doc/*/LICENSE.md
|
||||
rm -rvf %{buildroot}/%{_libdir}/rocmmod
|
||||
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%{_libdir}/librocm-core.so.*
|
||||
|
||||
%files devel
|
||||
%{_includedir}/rocm-core/*.h
|
||||
%{_libdir}/cmake/rocm-core/*.cmake
|
||||
%{_libdir}/librocm-core.so
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
@@ -0,0 +1,15 @@
|
||||
diff --git a/amd/comgr/CMakeLists.txt b/amd/comgr/CMakeLists.txt
|
||||
index cfa170f94..e03049224 100644
|
||||
--- a/amd/comgr/CMakeLists.txt
|
||||
+++ b/amd/comgr/CMakeLists.txt
|
||||
@@ -169,6 +169,10 @@ if (ADDRESS_SANITIZER)
|
||||
"${CMAKE_SHARED_LINKER_FLAGS} ${ASAN_LINKER_FLAGS}")
|
||||
endif()
|
||||
|
||||
+
|
||||
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ASAN_COMPILER_FLAGS} -fsigned-char")
|
||||
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ASAN_COMPILER_FLAGS} -fsigned-char")
|
||||
+
|
||||
set(AMD_COMGR_PRIVATE_COMPILE_OPTIONS)
|
||||
set(AMD_COMGR_PRIVATE_COMPILE_DEFINITIONS ${LLVM_DEFINITIONS})
|
||||
set(AMD_COMGR_PUBLIC_LINKER_OPTIONS)
|
||||
@@ -0,0 +1,122 @@
|
||||
ROCm 7.1.1 build with llvm-20, but only llvm-21 is provided by openRuyi.
|
||||
Some backport work need to be done.
|
||||
|
||||
This patch includes:
|
||||
|
||||
* https://github.com/ROCm/llvm-project/commit/a7ad03285bf9ff361acb5e721386870be9354620
|
||||
* https://github.com/ROCm/llvm-project/commit/75074634076c0e3b8b2a18bbcf6ffef01094b069
|
||||
|
||||
and another diff:
|
||||
|
||||
old https://github.com/ROCm/llvm-project/commit/f2987311af76ab1d5e6f770861865b6952002b0a
|
||||
new https://github.com/ROCm/llvm-project/commit/e3dc0a41658572aecb595e55a79b9f4a85224187
|
||||
|
||||
---
|
||||
diff --git a/amd/comgr/src/comgr-cache-bundler-command.cpp b/amd/comgr/src/comgr-cache-bundler-command.cpp
|
||||
index 514262725..23e919dbd 100644
|
||||
--- a/amd/comgr/src/comgr-cache-bundler-command.cpp
|
||||
+++ b/amd/comgr/src/comgr-cache-bundler-command.cpp
|
||||
@@ -155,10 +155,8 @@ void UnbundleCommand::addOptionsIdentifier(HashAlgorithm &H) const {
|
||||
Error UnbundleCommand::addInputIdentifier(HashAlgorithm &H) const {
|
||||
StringRef InputFilename = Config.InputFileNames.front();
|
||||
|
||||
- constexpr size_t LargestHeaderSize = CompressedOffloadBundle::V3HeaderSize;
|
||||
-
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> MaybeInputBuffer =
|
||||
- MemoryBuffer::getFileSlice(InputFilename, LargestHeaderSize, 0);
|
||||
+ MemoryBuffer::getFile(InputFilename);
|
||||
if (!MaybeInputBuffer) {
|
||||
std::error_code EC = MaybeInputBuffer.getError();
|
||||
return createStringError(EC, Twine("Failed to open ") + InputFilename +
|
||||
@@ -167,14 +165,17 @@ Error UnbundleCommand::addInputIdentifier(HashAlgorithm &H) const {
|
||||
|
||||
MemoryBuffer &InputBuffer = **MaybeInputBuffer;
|
||||
|
||||
- uint8_t Header[LargestHeaderSize];
|
||||
- memset(Header, 0, sizeof(Header));
|
||||
- memcpy(Header, InputBuffer.getBufferStart(),
|
||||
- std::min(LargestHeaderSize, InputBuffer.getBufferSize()));
|
||||
-
|
||||
- // only hash the input file, not the whole header. Colissions are unlikely
|
||||
- // since the header includes a hash (weak) of the contents
|
||||
- H.update(Header);
|
||||
+ using Header = CompressedOffloadBundle::CompressedBundleHeader;
|
||||
+ Expected<Header> MaybeHeader = Header::tryParse(InputBuffer.getBuffer());
|
||||
+ if (!MaybeHeader)
|
||||
+ return MaybeHeader.takeError();
|
||||
+
|
||||
+ // The hash represents the contents of the bundle. Extracting the same
|
||||
+ // contents should give the same result, regardless of the compression
|
||||
+ // algorithm or header version. Since the hash used by the offload bundler is
|
||||
+ // not a cryptographic hash, we also add the uncompressed file size.
|
||||
+ H.update(MaybeHeader->Hash);
|
||||
+ H.update(MaybeHeader->UncompressedFileSize);
|
||||
return Error::success();
|
||||
}
|
||||
|
||||
diff --git a/amd/comgr/src/comgr-compiler.cpp b/amd/comgr/src/comgr-compiler.cpp
|
||||
index 82102910a..96ca28b94 100644
|
||||
--- a/amd/comgr/src/comgr-compiler.cpp
|
||||
+++ b/amd/comgr/src/comgr-compiler.cpp
|
||||
@@ -70,6 +70,7 @@
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
+#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/MC/MCObjectFileInfo.h"
|
||||
#include "llvm/MC/MCObjectWriter.h"
|
||||
#include "llvm/MC/MCParser/MCAsmParser.h"
|
||||
@@ -462,8 +463,9 @@ bool executeAssemblerImpl(AssemblerInvocation &Opts, DiagnosticsEngine &Diags,
|
||||
|
||||
// FIXME: There is a bit of code duplication with addPassesToEmitFile.
|
||||
if (Opts.OutputType == AssemblerInvocation::FT_Asm) {
|
||||
- MCInstPrinter *IP = TheTarget->createMCInstPrinter(
|
||||
- llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI);
|
||||
+ std::unique_ptr<MCInstPrinter> InstructionPrinter(
|
||||
+ TheTarget->createMCInstPrinter(
|
||||
+ llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI));
|
||||
std::unique_ptr<MCCodeEmitter> MCE;
|
||||
std::unique_ptr<MCAsmBackend> MAB;
|
||||
if (Opts.ShowEncoding) {
|
||||
@@ -472,7 +474,7 @@ bool executeAssemblerImpl(AssemblerInvocation &Opts, DiagnosticsEngine &Diags,
|
||||
MAB.reset(TheTarget->createMCAsmBackend(*STI, *MRI, Options));
|
||||
}
|
||||
auto FOut = std::make_unique<formatted_raw_ostream>(*Out);
|
||||
- Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), IP,
|
||||
+ Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), std::move(InstructionPrinter),
|
||||
std::move(MCE), std::move(MAB)));
|
||||
} else if (Opts.OutputType == AssemblerInvocation::FT_Null) {
|
||||
Str.reset(createNullStreamer(Ctx));
|
||||
@@ -653,9 +655,9 @@ void logArgv(raw_ostream &OS, StringRef ProgramName,
|
||||
amd_comgr_status_t executeCommand(const Command &Job, raw_ostream &LogS,
|
||||
DiagnosticOptions &DiagOpts,
|
||||
llvm::vfs::FileSystem &FS) {
|
||||
- TextDiagnosticPrinter DiagClient(LogS, &DiagOpts);
|
||||
+ TextDiagnosticPrinter DiagClient(LogS, DiagOpts);
|
||||
IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs);
|
||||
- DiagnosticsEngine Diags(DiagID, &DiagOpts, &DiagClient, false);
|
||||
+ DiagnosticsEngine Diags(DiagID, DiagOpts, &DiagClient, false);
|
||||
|
||||
auto Arguments = Job.getArguments();
|
||||
SmallVector<const char *, 128> Argv;
|
||||
@@ -750,7 +752,7 @@ AMDGPUCompiler::executeInProcessDriver(ArrayRef<const char *> Args) {
|
||||
// here is mostly copy-and-pasted from driver.cpp/cc1_main.cpp/various Clang
|
||||
// tests to try to approximate the same behavior as running the `clang`
|
||||
// executable.
|
||||
- IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions);
|
||||
+ std::unique_ptr<DiagnosticOptions> DiagOpts(new DiagnosticOptions);
|
||||
unsigned MissingArgIndex, MissingArgCount;
|
||||
InputArgList ArgList = getDriverOptTable().ParseArgs(
|
||||
Args.slice(1), MissingArgIndex, MissingArgCount);
|
||||
@@ -759,9 +761,9 @@ AMDGPUCompiler::executeInProcessDriver(ArrayRef<const char *> Args) {
|
||||
// DiagnosticsEngine actually exists.
|
||||
(void)ParseDiagnosticArgs(*DiagOpts, ArgList);
|
||||
TextDiagnosticPrinter *DiagClient =
|
||||
- new TextDiagnosticPrinter(LogS, &*DiagOpts);
|
||||
+ new TextDiagnosticPrinter(LogS, *DiagOpts);
|
||||
IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs);
|
||||
- DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient);
|
||||
+ DiagnosticsEngine Diags(DiagID, *DiagOpts, DiagClient);
|
||||
|
||||
ProcessWarningOptions(Diags, *DiagOpts, *OverlayFS, /*ReportDiags=*/false);
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
##Fix issue with HIP, where compilation flags are incorrect, see issue:
|
||||
#https://github.com/RadeonOpenCompute/ROCm-CompilerSupport/issues/49
|
||||
#Remove redundant includes:
|
||||
sed -i '/Args.push_back("-isystem");/,+3d' amd/comgr/src/comgr-compiler.cpp
|
||||
#Source hard codes the libdir too:
|
||||
sed -i 's/lib\(\/clang\)/%{_lib}\1/' amd/comgr/src/comgr-compiler.cpp
|
||||
|
||||
# Unsupported options
|
||||
sed -i 's@Args.push_back("-mlink-builtin-bitcode-postopt");@//Args.push_back("-mlink-builtin-bitcode-postopt");@' amd/comgr/src/comgr-compiler.cpp
|
||||
|
||||
# Use system perl
|
||||
sed -i 's|\(/usr/bin/\)env perl|\1perl|' amd/hipcc/bin/hipvars.pm
|
||||
|
||||
# Default rocm path is _prefix
|
||||
# HIPCC fixes to find clang++
|
||||
|
||||
sed -i 's| or -e "$HIP_PATH/bin/clang"||' amd/hipcc/bin/hipvars.pm
|
||||
sed -i 's|lib/llvm/bin|%{_lib}/llvm%{llvm_maj_ver}/bin|' \
|
||||
amd/hipcc/bin/hipvars.pm amd/hipcc/src/hipBin_amd.h amd/hipcc/src/hipBin_base.h
|
||||
sed -i 's|-e "$HIP_ROCCLR_HOME/bin/clang" or ||' amd/hipcc/bin/hipvars.pm
|
||||
|
||||
# Fixup finding /opt/llvm
|
||||
sed -i -e 's@sys::path::append(LLVMPath, "llvm");@//sys::path::append(LLVMPath, "llvm");@' amd/comgr/src/comgr-env.cpp
|
||||
# Fixup finding /opt/rocm/hip
|
||||
sed -i -e 's@sys::path::append(HIPPath, "hip");@//sys::path::append(HIPPath, "hip");@' amd/comgr/src/comgr-env.cpp
|
||||
|
||||
# Default rocm path is _prefix
|
||||
sed -i -e 's@/opt/rocm@%{_prefix}@' amd/hipcc/src/hipBin_base.h
|
||||
|
||||
LLVM_BINDIR=`llvm-config --bindir`
|
||||
if [ ! -x ${LLVM_BINDIR}/clang++ ]; then
|
||||
echo "Something wrong with llvm-config"
|
||||
false
|
||||
fi
|
||||
echo "s@\$ROCM_PATH/lib/llvm/bin@${LLVM_BINDIR}@" > pm.sed
|
||||
echo "s@hipClangPath /= \"lib/llvm/bin\"@hipClangPath = \"${LLVM_BINDIR}\"@" > h.sed
|
||||
sed -i -f pm.sed amd/hipcc/bin/hipvars.pm
|
||||
sed -i -f h.sed amd/hipcc/src/hipBin_amd.h
|
||||
|
||||
# ROCm upstream uses /opt for rocm-runtime, but we uses /usr
|
||||
# Don't include it again since /usr/include is already included:
|
||||
sed -i '/" -isystem " + hsaPath + "\/include"/d' amd/hipcc/src/hipBin_amd.h
|
||||
|
||||
sed -i 's/find_package(Clang REQUIRED CONFIG)/find_package(Clang REQUIRED)/' amd/comgr/CMakeLists.txt
|
||||
sed -i 's/find_package(LLD REQUIRED CONFIG)/find_package(LLD REQUIRED)/' amd/comgr/CMakeLists.txt
|
||||
sed -i 's@${CLANG_CMAKE_DIR}/../../../@/usr/lib/clang/%{llvm_maj_ver}/@' amd/comgr/cmake/opencl_pch.cmake
|
||||
|
||||
# CMP0053 OLD is only needed on Windows. But on new version of cmake it is deprecated.
|
||||
sed -i 's/cmake_policy(SET CMP0053 OLD)/cmake_policy(SET CMP0053 NEW)/' amd/device-libs/cmake/OCL.cmake
|
||||
|
||||
# Fix up the path to the device libs hipcc uses
|
||||
sed -i -e 's@amdgcnBitcode = roccmPath@amdgcnBitcode = "%{_prefix}/%{amd_device_libs_prefix}/"@' amd/hipcc/src/hipBin_amd.h
|
||||
|
||||
# Fix up the location AMD_DEVICE_LIBS_PREFIX
|
||||
sed -i 's|@AMD_DEVICE_LIBS_PREFIX_CODE@|set(AMD_DEVICE_LIBS_PREFIX "%{_prefix}/%{amd_device_libs_prefix}")|' amd/device-libs/AMDDeviceLibsConfig.cmake.in
|
||||
@@ -0,0 +1,298 @@
|
||||
# SPDX-FileCopyrightText: (C) 2025, 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2025, 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: misaka00251 <liuxin@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# The package follows LLVM's major version, but API version is still important:
|
||||
%global comgr_maj_api_ver 3
|
||||
%global comgr_full_api_ver %{comgr_maj_api_ver}.0
|
||||
|
||||
# What LLVM is upstream using (use LLVM_VERSION_MAJOR from cmake/Modules/LLVMVersion.cmake):
|
||||
%global llvm_maj_ver 21
|
||||
# Sakura286: ROCm 7.1.1 uses LLVM 20, but only LLVM 21 is on openRuyi.
|
||||
# Backport is needed.
|
||||
%global rocm_llvm_maj_ver 20
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%global bundle_prefix %{_libdir}/llvm%{llvm_maj_ver}
|
||||
%global llvm_triple %{_target_platform}
|
||||
%global amd_device_libs_prefix lib/clang/%{llvm_maj_ver}
|
||||
|
||||
%global toolchain clang
|
||||
|
||||
%ifarch x86_64
|
||||
%global targets_to_build "X86;AMDGPU"
|
||||
%endif
|
||||
%ifarch riscv64
|
||||
%global targets_to_build "RISCV;AMDGPU"
|
||||
%endif
|
||||
|
||||
# All the tests are not enabled both on fedora and debian
|
||||
# https://salsa.debian.org/rocm-team/rocm-llvm/-/blob/debian/unstable/debian/rules
|
||||
# https://src.fedoraproject.org/rpms/rocm-compilersupport/blob/rawhide/f/rocm-compilersupport.spec
|
||||
# Disabled by default.
|
||||
%bcond device_libs_test 0
|
||||
%bcond comgr_test 0
|
||||
|
||||
Name: rocm-llvm
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: Various AMD ROCm LLVM related services
|
||||
# llvm is Apache-2.0 WITH LLVM-exception OR NCSA
|
||||
# hipcc is MIT, comgr and device-libs are NCSA:
|
||||
License: (Apache-2.0 WITH LLVM-exception OR NCSA) AND NCSA AND MIT
|
||||
URL: https://github.com/ROCm/llvm-project
|
||||
#!RemoteAsset: sha256:d76a16db4a56914383029e241823f7bc2a3d645f2967dd22230f11c11cfe189e
|
||||
Source0: %{url}/archive/refs/tags/rocm-%{rocm_version}.tar.gz
|
||||
Source1: rocm-llvm.prep.in
|
||||
|
||||
# RISC-V support patches
|
||||
# https://salsa.debian.org/rocm-team/rocm-llvm/-/merge_requests/2
|
||||
Patch0: 0002-Use-signed-char-in-comgr-building.patch
|
||||
# Backport mainline comgr patches since 7.1.1 is build on llvm-20
|
||||
Patch1: 0003-adapt-comgr-api-to-llvm-21.patch
|
||||
|
||||
BuildRequires: clang >= %{llvm_maj_ver}
|
||||
BuildRequires: clang-devel >= %{llvm_maj_ver}
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: cmake
|
||||
BuildRequires: fdupes
|
||||
BuildRequires: lld >= %{llvm_maj_ver}
|
||||
BuildRequires: lld-devel >= %{llvm_maj_ver}
|
||||
BuildRequires: llvm-devel >= %{llvm_maj_ver}
|
||||
BuildRequires: llvm-test >= %{llvm_maj_ver}
|
||||
BuildRequires: pkgconfig(libffi)
|
||||
BuildRequires: pkgconfig(libxml-2.0)
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
# comgr requires python
|
||||
BuildRequires: pkgconfig(python3)
|
||||
BuildRequires: pkgconfig(zlib)
|
||||
BuildRequires: rocm-cmake >= %{rocm_release}
|
||||
|
||||
%description
|
||||
%{summary}
|
||||
|
||||
%package macros
|
||||
Summary: ROCm Compiler RPM macros for RPM Build
|
||||
BuildArch: noarch
|
||||
|
||||
%description macros
|
||||
This package contains ROCm compiler related RPM macros.
|
||||
|
||||
%package -n rocm-device-libs
|
||||
Summary: AMD ROCm LLVM bit code libraries
|
||||
|
||||
%description -n rocm-device-libs
|
||||
This package contains a set of AMD specific device-side language runtime
|
||||
libraries in the form of bit code. Specifically:
|
||||
- Open Compute library controls
|
||||
- Open Compute Math library
|
||||
- Open Compute Kernel library
|
||||
- OpenCL built-in library
|
||||
- HIP built-in library
|
||||
- Heterogeneous Compute built-in library
|
||||
|
||||
%package -n rocm-comgr
|
||||
Summary: AMD ROCm LLVM Code Object Manager
|
||||
Provides: comgr(major) = %{comgr_maj_api_ver}
|
||||
Provides: rocm-comgr = %{comgr_full_api_ver}-%{release}
|
||||
|
||||
%description -n rocm-comgr
|
||||
The AMD Code Object Manager (Comgr) is a shared library which provides
|
||||
operations for creating and inspecting code objects.
|
||||
|
||||
%package -n rocm-comgr-devel
|
||||
Summary: AMD ROCm LLVM Code Object Manager
|
||||
Requires: rocm-comgr%{?_isa} = %{version}-%{release}
|
||||
Requires: rocm-device-libs
|
||||
|
||||
%description -n rocm-comgr-devel
|
||||
The AMD Code Object Manager (Comgr) development package.
|
||||
|
||||
%package -n hipcc
|
||||
Summary: HIP compiler driver
|
||||
Requires: rocm-device-libs = %{version}-%{release}
|
||||
Suggests: rocminfo
|
||||
|
||||
%description -n hipcc
|
||||
hipcc is a compiler driver utility that will call clang or nvcc, depending on
|
||||
target, and pass the appropriate include and library options for the target
|
||||
compiler and HIP infrastructure.
|
||||
|
||||
hipcc will pass-through options to the target compiler. The tools calling hipcc
|
||||
must ensure the compiler options are appropriate for the target compiler.
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -n llvm-project-rocm-%{rocm_version}
|
||||
|
||||
# llvm_maj_ver sanity check (we should be matching the bundled llvm major ver):
|
||||
if ! grep -q "set(LLVM_VERSION_MAJOR %{llvm_maj_ver})" cmake/Modules/LLVMVersion.cmake; then
|
||||
echo "ERROR llvm_maj_ver macro is not correctly set"
|
||||
# Sakura286: ROCm 7.1.1 uses LLVM 20, but only 21 is on openRuyi. Sad.
|
||||
# TODO: Need to re-enable this 'if' when rocm upstream bump to llvm-21
|
||||
# exit 1
|
||||
fi
|
||||
|
||||
# Make sure we only build the AMD bits by discarding the bundled llvm code:
|
||||
ls | grep -xv "amd" | xargs rm -r
|
||||
|
||||
install -pm 755 %{SOURCE1} prep.sh
|
||||
sed -i -e 's@%%{_prefix}@%{_prefix}@' prep.sh
|
||||
sed -i -e 's@%%{_lib}@%{_lib}@' prep.sh
|
||||
sed -i -e 's@%%{amd_device_libs_prefix}@%{amd_device_libs_prefix}@' prep.sh
|
||||
sed -i -e 's@%%{bundle_prefix}@%{bundle_prefix}@' prep.sh
|
||||
sed -i -e 's@%%{llvm_maj_ver}@%{llvm_maj_ver}@' prep.sh
|
||||
grep -v '%%{' prep.sh
|
||||
|
||||
. ./prep.sh
|
||||
|
||||
%build
|
||||
CLANG_VERSION=%llvm_maj_ver
|
||||
|
||||
# Maybe use llvm-config-%{llvm_maj_ver} in the future
|
||||
LLVM_BINDIR=`%{_libdir}/llvm%{llvm_maj_ver}/bin/llvm-config --bindir`
|
||||
LLVM_CMAKEDIR=`%{_libdir}/llvm%{llvm_maj_ver}/bin/llvm-config --cmakedir`
|
||||
# Only enable one target to accelerate build
|
||||
GPU_TARGET="gfx1100;gfx1101;gfx1200;gfx1201"
|
||||
|
||||
echo "%%rocmllvm_version $CLANG_VERSION" > macros.rocmcompiler
|
||||
echo "%%rocmllvm_bindir $LLVM_BINDIR" >> macros.rocmcompiler
|
||||
echo "%%rocmllvm_cmakedir $LLVM_CMAKEDIR" >> macros.rocmcompiler
|
||||
echo "%%rocm_gpu_list_default \"$GPU_TARGET\"" >> macros.rocmcompiler
|
||||
|
||||
export PATH=%{_libdir}/llvm%{llvm_maj_ver}/bin:$PATH
|
||||
export INCLUDE_PATH=%{_libdir}/llvm%{llvm_maj_ver}/include
|
||||
|
||||
# Build device-libs first, hipcc and comgr need it
|
||||
%define _vpath_srcdir amd/device-libs
|
||||
%define _vpath_builddir build-devicelibs
|
||||
# Workaround for bug in cmake tests not finding amdgcn:
|
||||
ln -s %{amd_device_libs_prefix}/amdgcn amdgcn
|
||||
#TODO ROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC_* should be removed in ROCm 7.0:
|
||||
%cmake -DROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC_NEW="%{amd_device_libs_prefix}/amdgcn" \
|
||||
-DROCM_DEVICE_LIBS_BITCODE_INSTALL_LOC_OLD="" \
|
||||
-DCMAKE_EXE_LINKER_FLAGS:STRING="-fuse-ld=lld" \
|
||||
%{?__cmake_build_type:-DCMAKE_BUILD_TYPE="%{__cmake_build_type}"}
|
||||
%cmake_build -- %{?_smp_mflags}
|
||||
# Used by comgr to find device libs when building:
|
||||
export ROCM_PATH=$(realpath %__cmake_builddir)
|
||||
|
||||
# Build comgr
|
||||
%define _vpath_srcdir amd/comgr
|
||||
%define _vpath_builddir build-comgr
|
||||
%cmake -DCMAKE_PREFIX_PATH=$ROCM_PATH \
|
||||
-DCMAKE_MODULE_PATH=%{_libdir}/llvm%{llvm_maj_ver}/lib \
|
||||
-DCMAKE_BUILD_TYPE="RELEASE" \
|
||||
-DCMAKE_EXE_LINKER_FLAGS:STRING="-fuse-ld=lld" \
|
||||
-DBUILD_TESTING=%{?with_comgr_test:ON}%{!?with_comgr_test:OFF}
|
||||
%cmake_build -- %{?_smp_mflags}
|
||||
|
||||
# Build hipcc
|
||||
%define _vpath_srcdir amd/hipcc
|
||||
%define _vpath_builddir build-hipcc
|
||||
%cmake -DHIPCC_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DCMAKE_EXE_LINKER_FLAGS:STRING="-fuse-ld=lld"
|
||||
%cmake_build -- %{?_smp_mflags}
|
||||
|
||||
%check
|
||||
# Test device-libs
|
||||
%define _vpath_srcdir amd/device-libs
|
||||
%define _vpath_builddir build-devicelibs
|
||||
# Workaround for bug in cmake tests not finding amdgcn:
|
||||
ln -s %{amd_device_libs_prefix}/amdgcn build-devicelibs/amdgcn
|
||||
# Below tests are failed:
|
||||
# 6 - compile_native_rcp__gfx600 (Failed)
|
||||
# 7 - compile_native_rsqrt__gfx600 (Failed)
|
||||
# 10 - compile_native_rcp__gfx700 (Failed)
|
||||
# 11 - compile_native_rsqrt__gfx700 (Failed)
|
||||
# 14 - compile_native_rcp__gfx803 (Failed)
|
||||
# 15 - compile_native_rsqrt__gfx803 (Failed)
|
||||
# 18 - compile_atomic_work_item_fence__gfx803 (Failed)
|
||||
# 19 - compile_atomic_work_item_fence__gfx900 (Failed)
|
||||
# 20 - compile_atomic_work_item_fence__gfx90a (Failed)
|
||||
# 21 - compile_atomic_work_item_fence__gfx1030 (Failed)
|
||||
# 22 - compile_atomic_work_item_fence__gfx1100 (Failed)
|
||||
# 23 - compile_atomic_work_item_fence__gfx1200 (Failed)
|
||||
%{?with_device_libs_test:%ctest}
|
||||
|
||||
# Test comgr
|
||||
%define _vpath_srcdir amd/comgr
|
||||
%define _vpath_builddir build-comgr
|
||||
# Below tests are failed:
|
||||
# 2 - comgr_disasm_llvm_reloc_test (SEGFAULT)
|
||||
# 3 - comgr_disasm_llvm_so_test (SEGFAULT)
|
||||
# 5 - comgr_disasm_options_test (SEGFAULT)
|
||||
# 13 - comgr_compile_test (Failed)
|
||||
# 14 - comgr_compile_minimal_test (Failed)
|
||||
# 16 - comgr_compile_log_remarks_test (Failed)
|
||||
# 17 - comgr_compile_source_with_device_libs_to_bc_with_vfs_test (Failed)
|
||||
# 21 - comgr_get_data_isa_name_test (Failed)
|
||||
# 29 - comgr_mangled_names_test (Failed)
|
||||
# 30 - comgr_multithread_test (SEGFAULT)
|
||||
# 32 - comgr_compile_hip_test (Failed)
|
||||
# 33 - comgr_compile_hip_to_relocatable (Failed)
|
||||
# 34 - comgr_mangled_names_hip_test (Failed)
|
||||
# 35 - comgr_unbundle_hip_test (Failed)
|
||||
%{?with_comgr_test:%ctest}
|
||||
|
||||
%install
|
||||
# Install macros
|
||||
install -Dpm 644 macros.rocmcompiler \
|
||||
%{buildroot}%{_rpmmacrodir}/macros.rocmcompiler
|
||||
|
||||
# Install device-libs
|
||||
%define _vpath_builddir build-devicelibs
|
||||
%cmake_install
|
||||
|
||||
# Install comgr
|
||||
%define _vpath_builddir build-comgr
|
||||
%cmake_install
|
||||
|
||||
# Install hipcc
|
||||
%define _vpath_builddir build-hipcc
|
||||
%cmake_install
|
||||
|
||||
rm -f %{buildroot}%{_datadir}/doc/ROCm-Device-Libs/LICENSE.TXT
|
||||
rm -rf %{buildroot}%{_datadir}/doc/amd_comgr
|
||||
rm -f %{buildroot}%{_datadir}/doc/hipcc/LICENSE.txt
|
||||
rm -f %{buildroot}%{_datadir}/doc/hipcc/README.md
|
||||
|
||||
%files macros
|
||||
%{_rpmmacrodir}/macros.rocmcompiler
|
||||
|
||||
%files -n rocm-device-libs
|
||||
%doc amd/device-libs/README.md amd/device-libs/doc/*.md
|
||||
%license amd/device-libs/LICENSE.TXT
|
||||
%dir %{_libdir}/cmake/AMDDeviceLibs
|
||||
%{_libdir}/cmake/AMDDeviceLibs/*.cmake
|
||||
%{_prefix}/%{amd_device_libs_prefix}/amdgcn
|
||||
|
||||
%files -n rocm-comgr
|
||||
%doc amd/comgr/README.md
|
||||
%license amd/comgr/LICENSE.txt
|
||||
%license amd/comgr/NOTICES.txt
|
||||
%{_libdir}/libamd_comgr.so.*
|
||||
|
||||
%files -n rocm-comgr-devel
|
||||
%dir %{_includedir}/amd_comgr
|
||||
%dir %{_libdir}/cmake/amd_comgr
|
||||
%{_includedir}/amd_comgr/amd_comgr.h
|
||||
%{_libdir}/libamd_comgr.so
|
||||
%{_libdir}/cmake/amd_comgr/*.cmake
|
||||
|
||||
%files -n hipcc
|
||||
%doc amd/hipcc/README.md
|
||||
%license amd/hipcc/LICENSE.txt
|
||||
%license amd/hipcc/README.md
|
||||
%{_bindir}/hipcc
|
||||
%{_bindir}/hipconfig
|
||||
%{_bindir}/hipvars.pm
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
@@ -4,13 +4,13 @@ Date: Mon, 3 Nov 2025 06:33:31 -0800
|
||||
Subject: [PATCH] rocm-origami remove scope for variables
|
||||
|
||||
---
|
||||
shared/origami/cmake/origami-config.cmake.in | 11 -----------
|
||||
1 file changed, 11 deletions(-)
|
||||
cmake/origami-config.cmake.in | 12 ------------
|
||||
1 file changed, 12 deletions(-)
|
||||
|
||||
diff --git a/shared/origami/cmake/origami-config.cmake.in b/shared/origami/cmake/origami-config.cmake.in
|
||||
diff --git a/cmake/origami-config.cmake.in b/cmake/origami-config.cmake.in
|
||||
index d6c8000d0261..19370e7dd5bd 100644
|
||||
--- a/shared/origami/cmake/origami-config.cmake.in
|
||||
+++ b/shared/origami/cmake/origami-config.cmake.in
|
||||
--- a/cmake/origami-config.cmake.in
|
||||
+++ b/cmake/origami-config.cmake.in
|
||||
@@ -6,15 +6,4 @@ find_dependency(hip REQUIRED)
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/origami-targets.cmake")
|
||||
|
||||
@@ -1,118 +1,81 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
%global upstreamname origami
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
%global origami_name rocm-origami%{pkg_suffix}
|
||||
Name: rocm-origami
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: Analytical GEMM Solution Selection
|
||||
License: MIT
|
||||
Url: https://github.com/ROCm/rocm-libraries
|
||||
#!RemoteAsset: sha256:1fb56e620a06e198aeec2cf37c11e6879d0c67c62e295b48779b7f486e34acb4
|
||||
Source0: %{url}/releases/download/rocm-%{version}/origami.tar.gz
|
||||
# License file is not included in the release tarball
|
||||
#!RemoteAsset: sha256:b185aaa652b0bf066c37a0d6314ce4bf4521e4a3c9bf46edd2f6a777ac522223
|
||||
Source1: https://raw.githubusercontent.com/ROCm/rocm-libraries/develop/shared/origami/LICENSE.md
|
||||
BuildSystem: cmake
|
||||
|
||||
Name: rocm-origami%{pkg_suffix}
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
Summary: Analytical GEMM Solution Selection
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DCMAKE_VERBOSE_MAKEFILE=ON
|
||||
|
||||
License: MIT
|
||||
URL: https://github.com/ROCm/rocm-libraries
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{upstreamname}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
# License file is not in the 7.1.0 tag, but is here
|
||||
Source2: https://github.com/ROCm/rocm-libraries/tree/develop/shared/origami/LICENSE.md
|
||||
|
||||
#
|
||||
# Workaround this hipblaslt build issue
|
||||
# CMake Error at /usr/lib64/cmake/origami/origami-config.cmake:11 (message):
|
||||
# Workaround hipblaslt build issue:
|
||||
# origami::origami target is missing
|
||||
#
|
||||
# hipblaslt from rocm-libraries does not use cmake to find origami
|
||||
# https://github.com/ROCm/rocm-libraries/issues/2422
|
||||
# So they would not have run into this issue.
|
||||
Patch1: 0001-rocm-origami-remove-scope-for-variables.patch
|
||||
Patch0: 0001-rocm-origami-remove-scope-for-variables.patch
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: git
|
||||
BuildRequires: llvm
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: ninja
|
||||
|
||||
%description
|
||||
The name "origami" still evokes the elegance of transforming
|
||||
a flat (2-D) sheet into intricate higher dimensional
|
||||
structures. In this context, however, Origami has evolved
|
||||
into a tool set for **GEMM solution selection and
|
||||
optimization**. Inspired by the art of paper folding, the
|
||||
library now enables users to explore a range of tiling and
|
||||
mapping configurations and to make informed decisions on
|
||||
data and computation mapping for high-performance GEMM
|
||||
operations.
|
||||
into a tool set for GEMM solution selection and optimization.
|
||||
Inspired by the art of paper folding, the library now enables
|
||||
users to explore a range of tiling and mapping configurations
|
||||
and to make informed decisions on data and computation mapping
|
||||
for high-performance GEMM operations.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{origami_name}%{?_isa} = %{version}-%{release}
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%prep
|
||||
%autosetup -p3 -n %{upstreamname}
|
||||
|
||||
# The license file
|
||||
cp %{SOURCE2} .
|
||||
|
||||
%prep -a
|
||||
# License file is not in the tarball
|
||||
cp %{SOURCE1} .
|
||||
# Use system rocm-cmake, no downloading
|
||||
sed -i -e 's@if(NOT ROCM_FOUND)@if(FALSE)@' cmake/dependencies.cmake
|
||||
# We are building from a tarball, not a git repo
|
||||
sed -i -e 's@find_package(Git REQUIRED)@#find_package(Git REQUIRED)@' cmake/dependencies.cmake
|
||||
|
||||
%build
|
||||
%cmake \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix}
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/origami/LICENSE.md
|
||||
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
%cmake_install
|
||||
|
||||
# Extra license
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/origami/LICENSE.md
|
||||
|
||||
%files -n %{origami_name}
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%{pkg_prefix}/%{pkg_libdir}/liborigami.so.0{,.*}
|
||||
%{_libdir}/liborigami.so.0{,.*}
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/include/origami/
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/origami/
|
||||
%{pkg_prefix}/%{pkg_libdir}/liborigami.so
|
||||
%{_includedir}/origami/
|
||||
%{_libdir}/cmake/origami/
|
||||
%{_libdir}/liborigami.so
|
||||
|
||||
%changelog
|
||||
* Mon Feb 23 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_version 7.1.1
|
||||
|
||||
Name: rocprim
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: ROCm parallel primatives
|
||||
License: MIT AND BSD-3-Clause
|
||||
URL: https://github.com/ROCm/rocPRIM
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF
|
||||
BuildOption(conf): -DBUILD_TEST=OFF
|
||||
BuildOption(conf): -DCMAKE_AR=%{rocmllvm_bindir}/llvm-ar
|
||||
BuildOption(conf): -DCMAKE_C_COMPILER=%{rocmllvm_bindir}/clang
|
||||
BuildOption(conf): -DCMAKE_CXX_COMPILER=%{rocmllvm_bindir}/clang++
|
||||
BuildOption(conf): -DCMAKE_LINKER=%{rocmllvm_bindir}/ld.lld
|
||||
BuildOption(conf): -DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/..
|
||||
BuildOption(conf): -DCMAKE_RANLIB=%{rocmllvm_bindir}/llvm-ranlib
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DROCM_SYMLINK_LIBS=OFF
|
||||
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(Clang)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(LLD)
|
||||
BuildRequires: cmake(LLVM)
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: python3
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: rocminfo
|
||||
|
||||
%description
|
||||
The rocPRIM is a header-only library providing HIP parallel primitives
|
||||
for developing performant GPU-accelerated code on AMD ROCm platform.
|
||||
|
||||
%package devel
|
||||
Summary: ROCm parallel primatives
|
||||
BuildArch: noarch
|
||||
|
||||
%description devel
|
||||
The rocPRIM is a header-only library providing HIP parallel primitives
|
||||
for developing performant GPU-accelerated code on AMD ROCm platform.
|
||||
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_prefix}/share/doc/rocprim/LICENSE.md
|
||||
|
||||
%files devel
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%license NOTICES.txt
|
||||
%{_includedir}/%{name}
|
||||
%{_libdir}/cmake/rocprim
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
@@ -1,42 +0,0 @@
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 2fdf5c4..0aac139 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -78,7 +78,8 @@ set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${PROJECT_SOURCE_DIR}/cmake/Mo
|
||||
include(GNUInstallDirs) # install directories
|
||||
|
||||
# ROCm does not use lib64
|
||||
-set(CMAKE_INSTALL_LIBDIR "lib")
|
||||
+# But distributions use
|
||||
+# set(CMAKE_INSTALL_LIBDIR "lib")
|
||||
|
||||
include(rocprofiler_register_utilities) # various functions/macros
|
||||
include(rocprofiler_register_interfaces) # interface libraries
|
||||
@@ -113,6 +114,7 @@ if(ROCPROFILER_REGISTER_BUILD_SAMPLES)
|
||||
add_subdirectory(samples)
|
||||
endif()
|
||||
|
||||
-include(rocprofiler_register_config_packaging)
|
||||
+# packaging don't need cpack
|
||||
+# include(rocprofiler_register_config_packaging)
|
||||
|
||||
rocprofiler_register_print_features()
|
||||
diff --git a/source/lib/rocprofiler-register/CMakeLists.txt b/source/lib/rocprofiler-register/CMakeLists.txt
|
||||
index e15fa88..6d9591e 100644
|
||||
--- a/source/lib/rocprofiler-register/CMakeLists.txt
|
||||
+++ b/source/lib/rocprofiler-register/CMakeLists.txt
|
||||
@@ -20,10 +20,13 @@ target_include_directories(
|
||||
target_link_libraries(
|
||||
rocprofiler-register
|
||||
PUBLIC rocprofiler-register::headers
|
||||
- PRIVATE fmt::fmt glog::glog rocprofiler-register::build-flags
|
||||
+ PRIVATE fmt glog rocprofiler-register::build-flags
|
||||
rocprofiler-register::memcheck rocprofiler-register::stdcxxfs
|
||||
rocprofiler-register::dl)
|
||||
|
||||
+target_compile_definitions(rocprofiler-register
|
||||
+ PRIVATE GLOG_USE_GLOG_EXPORT)
|
||||
+
|
||||
set_target_properties(
|
||||
rocprofiler-register
|
||||
PROPERTIES OUTPUT_NAME rocprofiler-register
|
||||
@@ -1,78 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_version 7.1.0
|
||||
|
||||
Name: rocprofiler-register
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: A rocprofiler helper library
|
||||
License: MIT AND BSD-3-Clause
|
||||
Url: https://github.com/ROCm/rocprofiler-register
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
# Use system glog
|
||||
Patch0: 0001-use-system-buildreq.patch
|
||||
|
||||
BuildOption(conf): -DROCPROFILER_REGISTER_BUILD_FMT=OFF
|
||||
BuildOption(conf): -DROCPROFILER_REGISTER_BUILD_GLOG=OFF
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(glog)
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: pkgconfig(fmt)
|
||||
BuildRequires: pkgconfig(gflags)
|
||||
|
||||
%description
|
||||
The rocprofiler-register library is a helper library that coordinates
|
||||
the modification of the intercept API table(s) of the HSA/HIP/ROCTx
|
||||
runtime libraries by the ROCprofiler (v2) library. The purpose of this
|
||||
library is to provide a consistent and automated mechanism of enabling
|
||||
performance analysis in the ROCm runtimes which does not rely on
|
||||
environment variables or unique methods for each runtime library.
|
||||
|
||||
When a runtime is initialized (either explicitly and lazily) and the
|
||||
intercept API table is constructed, it passes this API table to
|
||||
rocprofiler-register. Rocprofiler-register scans the symbols in the
|
||||
address space and if it detects there is at least one visible symbol
|
||||
named rocprofiler_configure (which is a function provided by tools),
|
||||
it passes the intercept API table to the rocprofiler library (dlopening
|
||||
the rocprofiler library if it is not already loaded). The rocprofiler
|
||||
library then does an extensive scan for all the instances of the
|
||||
rocprofiler_configure symbols and invokes each of them. The
|
||||
rocprofiler_configure function (again, provided by a tool) returns
|
||||
effectively tells rocprofiler which behaviors it wants to be notified
|
||||
about, features it wants to use (e.g. API tracing, kernel dispatch
|
||||
timing), etc.
|
||||
|
||||
%package devel
|
||||
Summary: The development package for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%install -a
|
||||
# Do not install the test source etc
|
||||
rm -rf %{buildroot}%{_prefix}/share/rocprofiler-register
|
||||
rm -rf %{buildroot}%{_prefix}/share/modulefiles
|
||||
rm -rf %{buildroot}%{_prefix}/share/doc/rocprofiler-register/LICENSE.md
|
||||
|
||||
%files
|
||||
%license LICENSE.md
|
||||
%{_libdir}/librocprofiler-register.so.0{,.*}
|
||||
|
||||
%files devel
|
||||
%doc README.md
|
||||
%{_includedir}/rocprofiler-register/
|
||||
%{_libdir}/librocprofiler-register.so
|
||||
%{_libdir}/cmake/rocprofiler-register/
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
+50
-161
@@ -1,131 +1,55 @@
|
||||
%global upstreamname rocRAND
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: Sakura286 <chenxuan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}/
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%endif
|
||||
%global rocrand_name rocrand%{pkg_suffix}
|
||||
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-mtls-dialect=gnu2//' )
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
# relevant HW is required to run %check
|
||||
# rocRAND need a GPU to run tests, but we could still
|
||||
# keep the test cases for packagers who have a GPU, so make it optional.
|
||||
%bcond test 0
|
||||
# enable building of tests if test is enabled
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
# For docs
|
||||
%bcond doc 0
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
# Use ninja if it is available
|
||||
%bcond ninja 1
|
||||
# rocm builds with clang
|
||||
%global toolchain clang
|
||||
|
||||
%if %{with ninja}
|
||||
%global cmake_generator -G Ninja
|
||||
%else
|
||||
%global cmake_generator %{nil}
|
||||
%endif
|
||||
|
||||
# The common parts of the cmake configuration
|
||||
%global cmake_config \\\
|
||||
-DBUILD_TEST=%build_test \\\
|
||||
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \\\
|
||||
-DCMAKE_BUILD_TYPE=%build_type \\\
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=%{build_compile_db} \\\
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \\\
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \\\
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \\\
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \\\
|
||||
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \\\
|
||||
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \\\
|
||||
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \\\
|
||||
-DCMAKE_SKIP_RPATH=ON \\\
|
||||
-DROCM_SYMLINK_LIBS=OFF
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
|
||||
%global gpu_list %{rocm_gpu_list_default}
|
||||
%global _gpu_list gfx1100
|
||||
|
||||
# export an llvm compilation database
|
||||
# Useful for input for other llvm tools
|
||||
%bcond export 0
|
||||
%if %{with export}
|
||||
%global build_compile_db ON
|
||||
%else
|
||||
%global build_compile_db OFF
|
||||
%endif
|
||||
|
||||
Name: rocrand%{pkg_suffix}
|
||||
Name: rocrand
|
||||
Version: %{rocm_version}
|
||||
Release: 7%{?dist}
|
||||
Release: %autorelease
|
||||
Summary: ROCm random number generator
|
||||
|
||||
Url: https://github.com/ROCm/rocRAND
|
||||
License: MIT AND BSD-3-Clause
|
||||
Source0: %{url}/archive/rocm-%{version}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
Url: https://github.com/ROCm/rocRAND
|
||||
#!RemoteAsset: sha256:15c33c595aa8e4de1d8b3736df9eaf2ceba7914ffebe718f0997b0da28215d9e
|
||||
Source: %{url}/archive/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DAMDGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBUILD_TEST=%{build_test}
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}-modules
|
||||
|
||||
%if %{with test} || %{with check}
|
||||
BuildRequires: gtest-devel
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
%if %{with test}
|
||||
BuildRequires: cmake(GTest)
|
||||
%endif
|
||||
|
||||
%if %{with doc}
|
||||
BuildRequires: doxygen
|
||||
%endif
|
||||
|
||||
%if %{with ninja}
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
%endif
|
||||
|
||||
Provides: rocrand%{pkg_suffix} = %{version}-%{release}
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%description
|
||||
The rocRAND project provides functions that generate pseudo-random and
|
||||
@@ -135,75 +59,40 @@ The rocRAND library is implemented in the HIP programming language and
|
||||
optimized for AMD's latest discrete GPUs. It is designed to run on top of AMD's
|
||||
Radeon Open Compute ROCm runtime, but it also works on CUDA enabled GPUs.
|
||||
|
||||
%package devel
|
||||
%package devel
|
||||
Summary: The rocRAND development package
|
||||
Requires: %{rocrand_name}%{?_isa} = %{version}-%{release}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%description devel
|
||||
The rocRAND development package.
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{rocrand_name}%{?_isa} = %{version}-%{release}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n rocm-libraries-%{commit0}
|
||||
cd projects/rocrand
|
||||
%else
|
||||
%autosetup -p1 -n %{upstreamname}-rocm-%{version}
|
||||
%endif
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/rocrand/LICENSE.md
|
||||
|
||||
|
||||
# On Tumbleweed Q3,2025
|
||||
# https://github.com/ROCm/rocm-libraries/issues/83
|
||||
# /usr/include/gtest/internal/gtest-port.h:273:2: error: C++ versions less than C++17 are not supported.
|
||||
# Convert the c++11's to c++17
|
||||
#sed -i -e 's@set(CMAKE_CXX_STANDARD 11)@set(CMAKE_CXX_STANDARD 17)@' {,test/{cpp_wrapper,package}/}CMakeLists.txt
|
||||
|
||||
%build
|
||||
|
||||
%cmake %{cmake_generator} %{cmake_config} \
|
||||
-DAMDGPU_TARGETS=%{gpu_list} \
|
||||
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
|
||||
%cmake_install
|
||||
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/rocrand/LICENSE.md
|
||||
|
||||
%files -n %{rocrand_name}
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%{_libdir}/librocrand.so.1{,.*}
|
||||
|
||||
%if %{with debug}
|
||||
%{pkg_prefix}/%{pkg_libdir}/librocrand-d.so.1{,.*}
|
||||
%else
|
||||
%{pkg_prefix}/%{pkg_libdir}/librocrand.so.1{,.*}
|
||||
%endif
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/include/rocrand/
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/rocrand/
|
||||
%if %{with debug}
|
||||
%{pkg_prefix}/%{pkg_libdir}/librocrand-d.so
|
||||
%else
|
||||
%{pkg_prefix}/%{pkg_libdir}/librocrand.so
|
||||
%endif
|
||||
%files devel
|
||||
%{_includedir}/rocrand/
|
||||
%{_libdir}/cmake/rocrand/
|
||||
%{_libdir}/librocrand.so
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{pkg_prefix}/bin/test_*
|
||||
%{pkg_prefix}/bin/rocRAND/
|
||||
%{_bindir}/rocRAND/
|
||||
%{_bindir}/test_*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Jan 26 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
From 22d2be00dc2289037144abeffab7a5526a8014ea Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Thu, 30 Oct 2025 11:27:03 -0700
|
||||
Subject: [PATCH] rocsolver ninja job pools
|
||||
|
||||
---
|
||||
CMakeLists.txt | 26 ++++++++++++++++++++++++++
|
||||
1 file changed, 26 insertions(+)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 32757570f70f..003b37f98fc5 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -125,6 +125,32 @@ option(BUILD_SHARED_LIBS "Build rocSOLVER as a shared library" ON)
|
||||
include(util)
|
||||
include(CheckLanguage)
|
||||
include(CMakeDependentOption)
|
||||
+#
|
||||
+# Seperate linking jobs from compiling
|
||||
+# Too many concurrent linking jobs can break the build
|
||||
+# Copied from LLVM
|
||||
+set(ROCSOLVER_PARALLEL_LINK_JOBS "" CACHE STRING
|
||||
+ "Define the maximum number of concurrent link jobs (Ninja only).")
|
||||
+if(CMAKE_GENERATOR MATCHES "Ninja")
|
||||
+ if(ROCSOLVER_PARALLEL_LINK_JOBS)
|
||||
+ set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${ROCSOLVER_PARALLEL_LINK_JOBS})
|
||||
+ set(CMAKE_JOB_POOL_LINK link_job_pool)
|
||||
+ endif()
|
||||
+elseif(ROCSOLVER_PARALLEL_LINK_JOBS)
|
||||
+ message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
+endif()
|
||||
+# Similar for compiling
|
||||
+set(ROCSOLVER_PARALLEL_COMPILE_JOBS "" CACHE STRING
|
||||
+ "Define the maximum number of concurrent compile jobs (Ninja only).")
|
||||
+if(CMAKE_GENERATOR MATCHES "Ninja")
|
||||
+ if(ROCSOLVER_PARALLEL_COMPILE_JOBS)
|
||||
+ set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${ROCSOLVER_PARALLEL_COMPILE_JOBS})
|
||||
+ set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
|
||||
+ endif()
|
||||
+elseif(ROCSOLVER_PARALLEL_COMPILE_JOBS)
|
||||
+ message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
+endif()
|
||||
+
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
option(BUILD_TESTING "Build rocSOLVER tests" OFF)
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
From 10affbe2ed6ad66b8a3940bc077161d71d8a8d54 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Rix <Tom.Rix@amd.com>
|
||||
Date: Thu, 30 Oct 2025 11:38:27 -0700
|
||||
Subject: [PATCH] rocsolver parallel jobs
|
||||
|
||||
---
|
||||
CMakeLists.txt | 3 +++
|
||||
library/src/CMakeLists.txt | 4 ++++
|
||||
2 files changed, 7 insertions(+)
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index 003b37f98fc5..1f93a519e537 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -177,6 +177,9 @@ option(WERROR "Treat warnings as errors" OFF)
|
||||
option(BUILD_COMPRESSED_DBG "Enable compressed debug symbols" ON)
|
||||
check_cxx_compiler_flag("--offload-compress" CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS)
|
||||
cmake_dependent_option(BUILD_OFFLOAD_COMPRESS "Build with offload compression" ON CXX_COMPILER_SUPPORTS_OFFLOAD_COMPRESS OFF)
|
||||
+check_cxx_compiler_flag("-parallel-jobs=4" CXX_COMPILER_SUPPORTS_PARALLEL_HIP_JOBS)
|
||||
+cmake_dependent_option(BUILD_PARALLEL_HIP_JOBS "Build with parallel hip jobs" ON CXX_COMPILER_SUPPORTS_PARALLEL_HIP_JOBS OFF)
|
||||
+
|
||||
|
||||
message(STATUS "Tests: ${BUILD_CLIENTS_TESTS}")
|
||||
message(STATUS "Benchmarks: ${BUILD_CLIENTS_BENCHMARKS}")
|
||||
diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt
|
||||
index b39646ee0f1d..7c5cc98b19ba 100755
|
||||
--- a/library/src/CMakeLists.txt
|
||||
+++ b/library/src/CMakeLists.txt
|
||||
@@ -448,6 +448,10 @@ if(BUILD_OFFLOAD_COMPRESS)
|
||||
target_compile_options(rocsolver PRIVATE "--offload-compress")
|
||||
endif()
|
||||
|
||||
+if(BUILD_PARALLEL_HIP_JOBS)
|
||||
+ target_compile_options(rocsolver PRIVATE "-parallel-jobs=4")
|
||||
+endif()
|
||||
+
|
||||
target_include_directories(rocsolver
|
||||
PUBLIC
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/library/include>
|
||||
--
|
||||
2.51.0
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%global rocm_version 7.1.1
|
||||
|
||||
# consume too much time
|
||||
%bcond test 0
|
||||
%bcond sample 0
|
||||
%bcond benchmark 0
|
||||
|
||||
Name: rocsolver
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: Next generation LAPACK implementation for ROCm platform
|
||||
License: BSD-3-Clause AND BSD-2-Clause
|
||||
Url: https://github.com/ROCm/rocSOLVER
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DCMAKE_CXX_COMPILER=hipcc
|
||||
BuildOption(conf): -DCMAKE_C_COMPILER=clang
|
||||
BuildOption(conf): -DCMAKE_AR=%rocmllvm_bindir/llvm-ar
|
||||
BuildOption(conf): -DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib
|
||||
BuildOption(conf): -DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/..
|
||||
BuildOption(conf): -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF
|
||||
BuildOption(conf): -DROCM_SYMLINK_LIBS=OFF
|
||||
BuildOption(conf): -DHIP_PLATFORM=amd
|
||||
BuildOption(conf): -DAMDGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBUILD_OFFLOAD_COMPRESS=ON
|
||||
BuildOption(conf): -DBUILD_PARALLEL_HIP_JOBS=ON
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=%{?with_test:ON}%{!?with_test:OFF}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_BENCHMARKS=%{?with_benchmark:ON}%{!?with_benchmark:OFF}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_SAMPLES=%{?with_sample:ON}%{!?with_sample:OFF}
|
||||
|
||||
# https://github.com/ROCm/rocSOLVER/pull/652
|
||||
Patch0: 0001-rocsolver-ninja-job-pools.patch
|
||||
# https://github.com/ROCm/rocSOLVER/pull/962
|
||||
Patch1: 0001-rocsolver-parallel-jobs.patch
|
||||
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(fmt)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocblas)
|
||||
BuildRequires: cmake(rocprim)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: rocsparse-devel
|
||||
BuildRequires: rocminfo
|
||||
|
||||
Provides: rocsolver = %{version}-%{release}
|
||||
|
||||
%description
|
||||
rocSOLVER is a work-in-progress implementation of a subset
|
||||
of LAPACK functionality on the ROCm platform.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_prefix}/share/doc/rocsolver/LICENSE.md
|
||||
|
||||
%files
|
||||
%license LICENSE.md
|
||||
%doc README.md
|
||||
%{_libdir}/librocsolver.so.0{,.*}
|
||||
|
||||
%files devel
|
||||
%{_includedir}/rocsolver/
|
||||
%{_libdir}/librocsolver.so
|
||||
%{_libdir}/cmake/rocsolver/
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{_datadir}/rocsolver/
|
||||
%{_bindir}/rocsolver*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
@@ -1,134 +0,0 @@
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%bcond test 0
|
||||
|
||||
%global rocm_version 7.1.1
|
||||
|
||||
Name: rocsparse
|
||||
Version: %{rocm_version}
|
||||
Release: %autorelease
|
||||
Summary: SPARSE implementation for ROCm
|
||||
License: MIT
|
||||
Url: https://github.com/ROCm/rocSPARSE
|
||||
#!RemoteAsset
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF
|
||||
BuildOption(conf): -DBUILD_WITH_OFFLOAD_COMPRESS=ON
|
||||
BuildOption(conf): -DCMAKE_CXX_COMPILER=hipcc
|
||||
BuildOption(conf): -DCMAKE_C_COMPILER=clang
|
||||
BuildOption(conf): -DCMAKE_LINKER=%rocmllvm_bindir/ld.lld
|
||||
BuildOption(conf): -DCMAKE_AR=%rocmllvm_bindir/llvm-ar
|
||||
BuildOption(conf): -DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib
|
||||
BuildOption(conf): -DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/..
|
||||
BuildOption(conf): -DHIP_PLATFORM=amd
|
||||
BuildOption(conf): -DROCM_SYMLINK_LIBS=OFF
|
||||
BuildOption(conf): -DBUILD_CLIENTS_BENCHMARKS=%{?with_test:ON}%{!?with_test:OFF}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS=%{?with_test:ON}%{!?with_test:OFF}
|
||||
BuildOption(conf): -DBUILD_CLIENTS_TESTS_OPENMP=OFF
|
||||
BuildOption(conf): -DBUILD_FORTRAN_CLIENTS=OFF
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -G Ninja
|
||||
%if %{with test}
|
||||
BuildOption(conf): -DCMAKE_MATRICES_DIR=%{_builddir}/rocsparse-test-matrices/
|
||||
%endif
|
||||
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: cmake
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: cmake(rocprim)
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(libzstd)
|
||||
BuildRequires: python3
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-llvm-macros
|
||||
BuildRequires: rocminfo
|
||||
%if %{with test}
|
||||
BuildRequires: cmake(GTest)
|
||||
BuildRequires: cmake(rocblas)
|
||||
BuildRequires: cmake(openmp)
|
||||
BuildRequires: gcc-gfortran
|
||||
BuildRequires: python3dist(pyyaml)
|
||||
%endif
|
||||
|
||||
Provides: %{name} = %{version}-%{release}
|
||||
|
||||
%description
|
||||
rocSPARSE exposes a common interface that provides Basic
|
||||
Linear Algebra Subroutines for sparse computation
|
||||
implemented on top of AMD's Radeon Open eCosystem Platform
|
||||
ROCm runtime and toolchains. rocSPARSE is created using
|
||||
the HIP programming language and optimized for AMD's
|
||||
latest discrete GPUs.
|
||||
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep -a
|
||||
# /usr/include/gtest/internal/gtest-port.h:273:2: error: C++ versions less than C++17 are not supported.
|
||||
# Convert the c++14 to c++17
|
||||
sed -i -e 's@set(CMAKE_CXX_STANDARD 14)@set(CMAKE_CXX_STANDARD 17)@' {,clients/}CMakeLists.txt
|
||||
|
||||
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_prefix}/share/doc/rocsparse/LICENSE.md
|
||||
|
||||
%if %{with test}
|
||||
mkdir -p %{buildroot}/%{_datadir}/rocsparse/matrices
|
||||
install -pm 644 %{_builddir}/rocsparse-test-matrices/* %{buildroot}/%{_datadir}/rocsparse/matrices
|
||||
%endif
|
||||
|
||||
%check
|
||||
%if %{with test}
|
||||
export LD_LIBRARY_PATH=%{_vpath_builddir}/library:$LD_LIBRARY_PATH
|
||||
%{_vpath_builddir}/clients/staging/rocsparse-test
|
||||
%endif
|
||||
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE.md
|
||||
%{_libdir}/librocsparse.so.1{,.*}
|
||||
|
||||
%files devel
|
||||
%{_includedir}/rocsparse/
|
||||
%{_libdir}/librocsparse.so
|
||||
%{_libdir}/cmake/rocsparse/
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{_bindir}/rocsparse*
|
||||
%{_datadir}/rocsparse/test/rocsparse_*
|
||||
%{_datadir}/rocsparse/
|
||||
%{_libdir}/rocsparse/
|
||||
%{_libexecdir}/rocsparse/
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
%{?autochangelog}
|
||||
+70
-124
@@ -1,162 +1,108 @@
|
||||
%global upstreamname rocthrust
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
%bcond compat 0
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
|
||||
# Compiler is hipcc, which is clang based:
|
||||
%global toolchain rocm
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/' -e 's/-mtls-dialect=gnu2//')
|
||||
# there is no debug package
|
||||
%global debug_package %{nil}
|
||||
|
||||
# Option to test suite for testing on real HW:
|
||||
%bcond check 0
|
||||
%if %{with check}
|
||||
# rocThrust needs a GPU to run tests, but we could still
|
||||
# keep the test cases for packagers who have a GPU, so make it optional.
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
# Threaded compression reduces the build time.
|
||||
%global _source_payload w7T0.xzdio
|
||||
%global _binary_payload w7T0.xzdio
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
Name: rocthrust%{pkg_suffix}
|
||||
# rocm builds with clang
|
||||
%global toolchain clang
|
||||
|
||||
Name: rocthrust
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
Summary: ROCm Thrust libary
|
||||
Release: %autorelease
|
||||
Summary: ROCm Thrust library
|
||||
|
||||
License: Apache-2.0 AND BSD-2-Clause AND BSD-3-Clause AND BSL-1.0 AND MIT AND LicenseRef-Fedora-Public-Domain
|
||||
Url: https://github.com/ROCm/rocThrust
|
||||
VCS: git:https://github.com/ROCm/rocThrust.git
|
||||
License: Apache-2.0 AND BSD-2-Clause AND BSD-3-Clause AND BSL-1.0 AND MIT
|
||||
# All files are Apache 2.0 with some exceptions:
|
||||
# ./cmake contains only files under MIT
|
||||
# ./internal/benchmark/*.py are dual licensed Apache 2.0 and Boost 1.0
|
||||
# ./thrust/ contain some headers files that are Boost 1.0 licensed
|
||||
# ./thrust/ contain some header files that are Boost 1.0 licensed
|
||||
# ./thrust/ contain some headers that are dual Apache 2.0 and Boost 1.0
|
||||
# ./thrust/cmake/FindTBB.cmake is public domain
|
||||
# ./thrust/detail/allocator/allocator_traits.h is dual Apache 2.0 and MIT
|
||||
# ./thrust/detail/complex contains BSD 2 clause licensed headers
|
||||
#!RemoteAsset: sha256:995f9498402f207d04aac1edeb845abea295f6f132151ae1e04a6f0d0dc5edf5
|
||||
Source: %{url}/archive/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
URL: https://github.com/ROCm/rocm-libraries
|
||||
Source0: %{url}/releases/download/rocm-%{version}/%{upstreamname}.tar.gz#/%{upstreamname}-%{version}.tar.gz
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DAMDGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
BuildOption(conf): -DBUILD_TEST=%{build_test}
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocprim%{pkg_suffix}-static
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
|
||||
%if %{with check}
|
||||
BuildRequires: gtest-devel
|
||||
BuildRequires: rocminfo
|
||||
%if %{with test}
|
||||
BuildRequires: cmake(GTest)
|
||||
%endif
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(rocprim)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%description
|
||||
Thrust is a parallel algorithm library. This library has been
|
||||
ported to HIP/ROCm platform, which uses the rocPRIM library.
|
||||
|
||||
%package devel
|
||||
Summary: The %{upstreamname} development package
|
||||
Provides: %{name}-static = %{version}-%{release}
|
||||
%package devel
|
||||
Summary: Libraries and headers for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
The %{upstreamname} development package.
|
||||
%description devel
|
||||
%{summary}
|
||||
|
||||
%prep
|
||||
%if %{with gitcommit}
|
||||
%setup -q -n rocm-libraries-%{commit0}
|
||||
cd projects/rocthrust
|
||||
%else
|
||||
%autosetup -n %{upstreamname} -p1
|
||||
%if %{with test}
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
#
|
||||
# The ROCMExportTargetsHeaderOnly.cmake file
|
||||
# generates a files that reference the install location of other files
|
||||
# Make this change so they match
|
||||
sed -i -e 's/ROCM_INSTALL_LIBDIR lib/ROCM_INSTALL_LIBDIR %{pkg_libdir}/' cmake/ROCMExportTargetsHeaderOnly.cmake
|
||||
%prep -a
|
||||
# ROCMExportTargetsHeaderOnly.cmake hardcodes 'lib' as the library directory.
|
||||
# Change it to the correct platform-specific library directory.
|
||||
sed -i -e 's/ROCM_INSTALL_LIBDIR lib/ROCM_INSTALL_LIBDIR %{_lib}/' cmake/ROCMExportTargetsHeaderOnly.cmake
|
||||
|
||||
%build
|
||||
%if %{with gitcommit}
|
||||
cd projects/rocthrust
|
||||
%endif
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_docdir}/rocthrust/LICENSE
|
||||
|
||||
|
||||
%if %{with check}
|
||||
# Building all the gpu's does not make sense
|
||||
# Build only the first one, this only works well with rpmbuild.
|
||||
gpu=`rocm_agent_enumerator | head -n 1`
|
||||
%endif
|
||||
|
||||
%cmake \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DCMAKE_LINKER=%rocmllvm_bindir/ld.lld \
|
||||
-DCMAKE_AR=%rocmllvm_bindir/llvm-ar \
|
||||
-DCMAKE_RANLIB=%rocmllvm_bindir/llvm-ranlib \
|
||||
-DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \
|
||||
-DBUILD_TEST=%{build_test} \
|
||||
%if %{with check}
|
||||
-DAMDGPU_TARGETS=${gpu} \
|
||||
%endif
|
||||
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
|
||||
-DROCM_SYMLINK_LIBS=OFF
|
||||
|
||||
%cmake_build
|
||||
|
||||
%install
|
||||
%cmake_install
|
||||
|
||||
# Extra license
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/rocthrust/LICENSE
|
||||
|
||||
%check
|
||||
%if %{with check}
|
||||
%ctest
|
||||
%endif
|
||||
|
||||
%files devel
|
||||
%files
|
||||
%doc README.md
|
||||
%license LICENSE
|
||||
%license NOTICES.txt
|
||||
%{pkg_prefix}/include/thrust
|
||||
%{pkg_prefix}/%{pkg_libdir}/cmake/rocthrust/
|
||||
|
||||
%files devel
|
||||
%{_includedir}/thrust/
|
||||
%{_libdir}/cmake/rocthrust/
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{_bindir}/test_*
|
||||
%{_bindir}/rocthrust/
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Jan 26 2026 Yifan Xu <xuyifan@iscas.ac.cn> - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
+61
-172
@@ -1,217 +1,106 @@
|
||||
%global upstreamname roctracer
|
||||
# SPDX-FileCopyrightText: (C) 2026 Institute of Software, Chinese Academy of Sciences (ISCAS)
|
||||
# SPDX-FileCopyrightText: (C) 2026 openRuyi Project Contributors
|
||||
# SPDX-FileContributor: CHEN Xuan <chenxuan@iscas.ac.cn>
|
||||
# SPDX-FileContributor: Yifan Xu <xuyifan@iscas.ac.cn>
|
||||
#
|
||||
# SPDX-License-Identifier: MulanPSL-2.0
|
||||
|
||||
# roctracer needs a GPU to run tests, but we could still
|
||||
# keep the test cases for packagers who have a GPU, so make it optional.
|
||||
%bcond test 0
|
||||
%if %{with test}
|
||||
%global build_test ON
|
||||
%else
|
||||
%global build_test OFF
|
||||
%endif
|
||||
|
||||
%global rocm_release 7.1
|
||||
%global rocm_patch 1
|
||||
%global rocm_version %{rocm_release}.%{rocm_patch}
|
||||
|
||||
%bcond_with compat
|
||||
%if %{with compat}
|
||||
%global pkg_libdir lib
|
||||
%global pkg_prefix %{_prefix}/lib64/rocm/rocm-%{rocm_release}
|
||||
%global pkg_suffix -%{rocm_release}
|
||||
%global pkg_module rocm%{pkg_suffix}
|
||||
%else
|
||||
%global pkg_libdir %{_lib}
|
||||
%global pkg_prefix %{_prefix}
|
||||
%global pkg_suffix %{nil}
|
||||
%global pkg_module default
|
||||
%endif
|
||||
|
||||
# rocm stack builds with clang
|
||||
%global toolchain clang
|
||||
# hipcc does not support some clang flags
|
||||
%global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/')
|
||||
|
||||
# Needs ROCm HW and is only suitable for local testing
|
||||
# GPU_TARGETS in the cmake config are only for testing
|
||||
%bcond test 0
|
||||
|
||||
%bcond doc 0
|
||||
|
||||
%bcond debug 0
|
||||
%if %{with debug}
|
||||
%global build_type DEBUG
|
||||
%else
|
||||
%global build_type RelWithDebInfo
|
||||
%endif
|
||||
|
||||
# Compression type and level for source/binary package payloads.
|
||||
# "w7T0.xzdio" xz level 7 using %%{getncpus} threads
|
||||
%define _source_payload w7T0.xzdio
|
||||
%define _binary_payload w7T0.xzdio
|
||||
|
||||
Name: roctracer%{pkg_suffix}
|
||||
Name: roctracer
|
||||
Version: %{rocm_version}
|
||||
Release: 1%{?dist}
|
||||
Summary: ROCm Tracer Callback/Activity Library for Performance tracing AMD GPUs
|
||||
|
||||
Url: https://github.com/ROCm/%{upstreamname}
|
||||
Release: %autorelease
|
||||
Summary: ROCm Tracer Callback/Activity Library
|
||||
Url: https://github.com/ROCm/roctracer
|
||||
VCS: git:https://github.com/ROCm/roctracer.git
|
||||
License: MIT
|
||||
Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz#/%{upstreamname}-%{rocm_version}.tar.gz
|
||||
#!RemoteAsset: sha256:dec80803c6d2d684759172145177849efda65672645b95a2f2ad1a84335043bb
|
||||
Source: %{url}/archive/rocm-%{version}.tar.gz
|
||||
BuildSystem: cmake
|
||||
|
||||
BuildOption(conf): -G Ninja
|
||||
BuildOption(conf): -DGPU_TARGETS=%{rocm_gpu_list_default}
|
||||
|
||||
BuildRequires: llvm
|
||||
BuildRequires: llvm-devel
|
||||
BuildRequires: clang
|
||||
BuildRequires: clang-devel
|
||||
BuildRequires: clang-tools-extra
|
||||
BuildRequires: clang-tools-extra-devel
|
||||
BuildRequires: lld
|
||||
BuildRequires: lld-devel
|
||||
|
||||
BuildRequires: hipcc
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: rocm-device-libs
|
||||
|
||||
BuildRequires: cmake
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: rocm-cmake%{pkg_suffix}
|
||||
BuildRequires: rocm-comgr%{pkg_suffix}-devel
|
||||
BuildRequires: rocm-llvm%{pkg_suffix}-macros
|
||||
BuildRequires: rocm-hip%{pkg_suffix}-devel
|
||||
BuildRequires: rocr-runtime%{pkg_suffix}-devel
|
||||
#BuildRequires: rocm-rpm-macros%{pkg_suffix}
|
||||
|
||||
BuildRequires: libatomic1
|
||||
# https://github.com/ROCm/roctracer/issues/113
|
||||
BuildRequires: cmake(amd_comgr)
|
||||
BuildRequires: cmake(hip)
|
||||
BuildRequires: cmake(hsa-runtime64)
|
||||
BuildRequires: compiler-rt
|
||||
BuildRequires: lld
|
||||
BuildRequires: llvm
|
||||
BuildRequires: ninja
|
||||
BuildRequires: pkgconfig(atomic_ops)
|
||||
BuildRequires: python3dist(cppheaderparser)
|
||||
|
||||
%if %{with doc}
|
||||
BuildRequires: doxygen
|
||||
BuildRequires: texlive-adjustbox
|
||||
BuildRequires: texlive-dvips
|
||||
BuildRequires: texlive-ec
|
||||
BuildRequires: texlive-hanging
|
||||
BuildRequires: texlive-latex
|
||||
BuildRequires: texlive-makeindex
|
||||
BuildRequires: texlive-metafont
|
||||
BuildRequires: texlive-multirow
|
||||
BuildRequires: texlive-newunicodechar
|
||||
BuildRequires: texlive-stackengine
|
||||
BuildRequires: texlive-texlive-scripts
|
||||
BuildRequires: texlive-tocloft
|
||||
BuildRequires: texlive-ulem
|
||||
BuildRequires: texlive-url
|
||||
BuildRequires: texlive-wasy
|
||||
BuildRequires: texlive-wasysym
|
||||
%endif
|
||||
|
||||
ExclusiveArch: x86_64 riscv64
|
||||
BuildRequires: rocm-cmake
|
||||
BuildRequires: rocm-device-libs
|
||||
BuildRequires: rocm-llvm-macros
|
||||
|
||||
%description
|
||||
ROC-tracer
|
||||
roctracer is a callback and activity tracing library for ROCm. It provides
|
||||
function call tracing for HIP and other ROCm runtimes, activity (asynchronous)
|
||||
tracing, and ROCTx user-defined event markers.
|
||||
|
||||
* ROC-tracer library: Runtimes Generic Callback/Activity APIs
|
||||
|
||||
The goal of the implementation is to provide a generic independent
|
||||
from specific runtime profiler to trace API and asynchronous activity.
|
||||
|
||||
The API provides functionality for registering the runtimes API
|
||||
callbacks and asynchronous activity records pool support.
|
||||
|
||||
* ROC-TX library: Code Annotation Events API
|
||||
|
||||
Includes API for:
|
||||
|
||||
* roctxMark
|
||||
* roctxRangePush
|
||||
* roctxRangePop
|
||||
|
||||
%post -p /sbin/ldconfig
|
||||
%postun -p /sbin/ldconfig
|
||||
|
||||
%package devel
|
||||
Summary: The %{name} development package
|
||||
%package devel
|
||||
Summary: The roctracer development package
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description devel
|
||||
The headers of libraries for %{name}.
|
||||
|
||||
%if %{with doc}
|
||||
%package doc
|
||||
Summary: Docs for %{name}
|
||||
|
||||
%description doc
|
||||
%{summary}
|
||||
%endif
|
||||
%description devel
|
||||
The roctracer development package.
|
||||
|
||||
%if %{with test}
|
||||
%package test
|
||||
%package test
|
||||
Summary: Tests for %{name}
|
||||
Requires: %{name}%{?_isa} = %{version}-%{release}
|
||||
|
||||
%description test
|
||||
%description test
|
||||
%{summary}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%autosetup -p1 -n %{upstreamname}-rocm-%{version}
|
||||
|
||||
%prep -a
|
||||
# No knob in cmake to turn off testing
|
||||
%if %{without test}
|
||||
sed -i -e 's@add_subdirectory(test)@#add_subdirectory(test)@' CMakeLists.txt
|
||||
|
||||
%else
|
||||
|
||||
# Adjust test running script lib dir
|
||||
sed -i -e 's@../lib/@../%{pkg_libdir}/@' test/run.sh
|
||||
|
||||
sed -i -e 's@../lib/@../%{_lib}/@' test/run.sh
|
||||
%endif
|
||||
|
||||
%build
|
||||
%cmake \
|
||||
-DCMAKE_BUILD_TYPE=%{build_type} \
|
||||
-DCMAKE_C_COMPILER=%rocmllvm_bindir/clang \
|
||||
-DCMAKE_CXX_COMPILER=%rocmllvm_bindir/clang++ \
|
||||
-DCMAKE_CXX_FLAGS="-I%{pkg_prefix}/include"\
|
||||
-DCMAKE_EXE_LINKER_FLAGS="-L %{pkg_prefix}/%{pkg_libdir} -lamdhip64" \
|
||||
-DCMAKE_INSTALL_LIBDIR=%{pkg_libdir} \
|
||||
-DCMAKE_INSTALL_PREFIX=%{pkg_prefix} \
|
||||
-DCMAKE_MODULE_PATH=%{pkg_prefix}/%{pkg_libdir}/cmake/hip \
|
||||
-DCMAKE_PREFIX_PATH=%{rocmllvm_cmakedir}/.. \
|
||||
-DROCM_SYMLINK_LIBS=OFF \
|
||||
-DGPU_TARGETS=%{rocm_gpu_list_test} \
|
||||
-DHIP_PLATFORM=amd \
|
||||
-DHIP_HIPCC_FLAGS="-I%{pkg_prefix}/include -L%{pkg_prefix}/%{pkg_libdir} -lamdhip64" \
|
||||
-DBUILD_SHARED_LIBS=ON
|
||||
|
||||
%cmake_build
|
||||
|
||||
%if %{with doc}
|
||||
%cmake_build -t doc
|
||||
%endif
|
||||
|
||||
%install
|
||||
%cmake_install
|
||||
|
||||
# Only install the pdf
|
||||
rm -rf rm %{buildroot}%{pkg_prefix}/share/html
|
||||
# Extra licenses
|
||||
# Fedora
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/*/LICENSE.md
|
||||
# OpenSUSE
|
||||
rm -f %{buildroot}%{pkg_prefix}/share/doc/*/*/LICENSE.md
|
||||
|
||||
%install -a
|
||||
rm -f %{buildroot}%{_datadir}/doc/%{name}/LICENSE.md
|
||||
rm -rf %{buildroot}%{_datadir}/doc/%{name}-asan
|
||||
|
||||
%files
|
||||
%license LICENSE.md
|
||||
%doc README.md
|
||||
%{pkg_prefix}/%{pkg_libdir}/libroctracer64.so.*
|
||||
%{pkg_prefix}/%{pkg_libdir}/libroctx64.so.*
|
||||
%{pkg_prefix}/%{pkg_libdir}/roctracer/
|
||||
%{_libdir}/libroctracer64.so.*
|
||||
%{_libdir}/libroctx64.so.*
|
||||
%{_libdir}/roctracer/
|
||||
|
||||
%files devel
|
||||
%{pkg_prefix}/include/roctracer
|
||||
%{pkg_prefix}/%{pkg_libdir}/libroctracer64.so
|
||||
%{pkg_prefix}/%{pkg_libdir}/libroctx64.so
|
||||
|
||||
%if %{with doc}
|
||||
%files doc
|
||||
%{pkg_prefix}/share/doc/roctracer/
|
||||
%endif
|
||||
%{_includedir}/roctracer/
|
||||
%{_libdir}/libroctracer64.so
|
||||
%{_libdir}/libroctx64.so
|
||||
|
||||
%if %{with test}
|
||||
%files test
|
||||
%{pkg_prefix}/share/roctracer/
|
||||
%{_datadir}/roctracer/
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Jan 26 2026 Yifan Xu <xuyifan@iscas.ac.cn> - - 7.1.1-1
|
||||
- Import from upstream
|
||||
%autochangelog
|
||||
|
||||
Reference in New Issue
Block a user