| | import logging |
| | import os |
| | from shutil import which, move |
| | import subprocess |
| | import sys |
| | from pathlib import Path |
| |
|
| | from setuptools import Extension, find_packages, setup |
| | from setuptools.command.build_ext import build_ext |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | def is_sccache_available() -> bool: |
| | return which("sccache") is not None |
| |
|
| |
|
| | def is_ccache_available() -> bool: |
| | return which("ccache") is not None |
| |
|
| |
|
| | def is_ninja_available() -> bool: |
| | return which("ninja") is not None |
| |
|
| |
|
| | class CMakeExtension(Extension): |
| | def __init__(self, name: str, sourcedir: str = "") -> None: |
| | super().__init__(name, sources=[], py_limited_api=True) |
| | self.sourcedir = os.fspath(Path(sourcedir).resolve()) |
| |
|
| |
|
| | class CMakeBuild(build_ext): |
| | def build_extension(self, ext: CMakeExtension) -> None: |
| | ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) |
| | extdir = ext_fullpath.parent.resolve() |
| |
|
| | debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug |
| | cfg = "Debug" if debug else "Release" |
| |
|
| | cmake_generator = os.environ.get("CMAKE_GENERATOR", "") |
| |
|
| | |
| | |
| | |
| | cmake_args = [ |
| | f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}", |
| | f"-DPython_EXECUTABLE={sys.executable}", |
| | f"-DCMAKE_BUILD_TYPE={cfg}", |
| | ] |
| | build_args = [] |
| | if "CMAKE_ARGS" in os.environ: |
| | cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] |
| |
|
| | if not cmake_generator or cmake_generator == "Ninja": |
| | try: |
| | import ninja |
| |
|
| | ninja_executable_path = Path(ninja.BIN_DIR) / "ninja" |
| | cmake_args += [ |
| | "-GNinja", |
| | f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}", |
| | ] |
| | except ImportError: |
| | pass |
| |
|
| | if is_sccache_available(): |
| | cmake_args += [ |
| | "-DCMAKE_C_COMPILER_LAUNCHER=sccache", |
| | "-DCMAKE_CXX_COMPILER_LAUNCHER=sccache", |
| | "-DCMAKE_CUDA_COMPILER_LAUNCHER=sccache", |
| | "-DCMAKE_HIP_COMPILER_LAUNCHER=sccache", |
| | ] |
| | elif is_ccache_available(): |
| | cmake_args += [ |
| | "-DCMAKE_C_COMPILER_LAUNCHER=ccache", |
| | "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", |
| | "-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache", |
| | "-DCMAKE_HIP_COMPILER_LAUNCHER=ccache", |
| | ] |
| |
|
| | num_jobs = os.getenv("MAX_JOBS", None) |
| | if num_jobs is not None: |
| | num_jobs = int(num_jobs) |
| | logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs) |
| | else: |
| | try: |
| | |
| | |
| | num_jobs = len(os.sched_getaffinity(0)) |
| | except AttributeError: |
| | num_jobs = os.cpu_count() |
| |
|
| | nvcc_threads = os.getenv("NVCC_THREADS", None) |
| | if nvcc_threads is not None: |
| | nvcc_threads = int(nvcc_threads) |
| | logger.info( |
| | "Using NVCC_THREADS=%d as the number of nvcc threads.", nvcc_threads |
| | ) |
| | else: |
| | nvcc_threads = 1 |
| | num_jobs = max(1, num_jobs // nvcc_threads) |
| |
|
| | build_args += [f"-j{num_jobs}"] |
| | if sys.platform == "win32": |
| | build_args += ["--config", cfg] |
| |
|
| | if nvcc_threads: |
| | cmake_args += ["-DNVCC_THREADS={}".format(nvcc_threads)] |
| |
|
| | build_temp = Path(self.build_temp) / ext.name |
| | if not build_temp.exists(): |
| | build_temp.mkdir(parents=True) |
| |
|
| | subprocess.run( |
| | ["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True |
| | ) |
| | subprocess.run( |
| | ["cmake", "--build", ".", *build_args], cwd=build_temp, check=True |
| | ) |
| | if sys.platform == "win32": |
| | |
| | for filename in os.listdir(extdir / cfg): |
| | move(extdir / cfg / filename, extdir / filename) |
| |
|
| |
|
| |
|
| | setup( |
| | name="gemm", |
| | |
| | version="0.1.0", |
| | ext_modules=[CMakeExtension("gemm._gemm_926a158_dirty")], |
| | cmdclass={"build_ext": CMakeBuild}, |
| | packages=find_packages(where="torch-ext", include=["gemm*"]), |
| | package_dir={"": "torch-ext"}, |
| | zip_safe=False, |
| | install_requires=["torch"], |
| | python_requires=">=3.9", |
| | ) |