diff --git a/build.toml b/build.toml index 3872776019c41eb700c91e3bf907e8b71bd47b25..a011dc8ffc58cb3726db1bafb34e31e473be8bb2 100644 --- a/build.toml +++ b/build.toml @@ -9,3 +9,11 @@ src = ["torch-ext/torch_binding.cpp"] backend = "cuda" depends = ["torch"] src = ["rotary/rotary_cuda.cu"] + +[kernel.rotary_xpu] +backend = "xpu" +depends = ["torch"] +src = [ + "rotary-xpu/rotary_xpu.cpp", + "rotary-xpu/rotary_xpu.hpp", + ] \ No newline at end of file diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/__init__.py b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__init__.py old mode 100644 new mode 100755 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..3e8cb222776a2da8e112221fdbb99a414d622cf4 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..05bd2d15b6eca322d474442e211e7e4d63071f50 Binary files /dev/null and b/build/torch27-cxx11-cu118-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_ops.py old mode 100644 new mode 100755 index 8d9717343c07cf81e45646b6fc80dddc95d58bdf..882a9998d9ad3e0226135704ffdc37207e41c499 --- a/build/torch27-cxx11-cu118-x86_64-linux/rotary/_ops.py +++ b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _rotary_6b8e81d -ops = torch.ops._rotary_6b8e81d +from . import _rotary_cd1413b_dirty +ops = torch.ops._rotary_cd1413b_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_rotary_6b8e81d::{op_name}" \ No newline at end of file + return f"_rotary_cd1413b_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so deleted file mode 100755 index 94c6d44c7dba5b009caa43e9baffe81adedf970b..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e67a5779587b10616a0e19961ae50495c367f53da7c4a40a1c9b1f557537441d -size 6287712 diff --git a/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..2f86c8063c7fe14d1750fa98cf6180d4fafddd3e --- /dev/null +++ b/build/torch27-cxx11-cu118-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c8d8635b97b599a33ba169458b47b9276f673c678c413107a5cab5a835f90e +size 6807672 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/__init__.py b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__init__.py old mode 100644 new mode 100755 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..dba38f5c4fda815a7a0c08a9be238e15b8b7dc82 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..eca86af4da07beef6c418ed60c34ba89b676ab59 Binary files /dev/null and b/build/torch27-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_ops.py old mode 100644 new mode 100755 index 8d9717343c07cf81e45646b6fc80dddc95d58bdf..882a9998d9ad3e0226135704ffdc37207e41c499 --- a/build/torch27-cxx11-cu126-x86_64-linux/rotary/_ops.py +++ b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _rotary_6b8e81d -ops = torch.ops._rotary_6b8e81d +from . import _rotary_cd1413b_dirty +ops = torch.ops._rotary_cd1413b_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_rotary_6b8e81d::{op_name}" \ No newline at end of file + return f"_rotary_cd1413b_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so deleted file mode 100755 index 66c3721a4eb80c7d99bb53766a1c66cb9b4303f6..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ced538b9a02abba45b7769dd09902cbc1816091ec50da2b65c0549f85974ea4c -size 6296464 diff --git a/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..43f652168a6298256b697f26a9bd1e53108dd855 --- /dev/null +++ b/build/torch27-cxx11-cu126-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b49a2fb4c22c6cda6d4d28d1f5eb3ad84801174c7790628519f0c7529a57773 +size 6820520 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/__init__.py b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__init__.py old mode 100644 new mode 100755 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..a6e3f79acb019fbf14ac6ffd999448170ef1b4d5 Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..ba5d3a9275df973397473625f68d37ff53f76eea Binary files /dev/null and b/build/torch27-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_ops.py old mode 100644 new mode 100755 index 8d9717343c07cf81e45646b6fc80dddc95d58bdf..882a9998d9ad3e0226135704ffdc37207e41c499 --- a/build/torch27-cxx11-cu128-x86_64-linux/rotary/_ops.py +++ b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _rotary_6b8e81d -ops = torch.ops._rotary_6b8e81d +from . import _rotary_cd1413b_dirty +ops = torch.ops._rotary_cd1413b_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_rotary_6b8e81d::{op_name}" \ No newline at end of file + return f"_rotary_cd1413b_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so deleted file mode 100755 index e3be43836a3f46b11493b2a64eeb33e560b7bd9c..0000000000000000000000000000000000000000 --- a/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_6b8e81d.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4a64022928f62fe0b476c54f5a426b856296fc2bba796c7e9fe6406c6d65485 -size 10157336 diff --git a/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..2a50c157d413563fcba486b8104bea699d4f05e1 --- /dev/null +++ b/build/torch27-cxx11-cu128-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:638bbc069d927f9e37f1e720e73ee4af097ec16fc882f7abcc04dae2045b80a1 +size 10529832 diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__init__.py b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..abd6c81dc9fecc7d53eb660acfea14d138cb3936 Binary files /dev/null and b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..5ffe3824a7c006aaad48f0acad988ac0ffdd0359 Binary files /dev/null and b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_ops.py b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_ops.py new file mode 100755 index 0000000000000000000000000000000000000000..882a9998d9ad3e0226135704ffdc37207e41c499 --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cd1413b_dirty +ops = torch.ops._rotary_cd1413b_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cd1413b_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..a30dcc3d3d7f34c0bd8a5500a0c5b88d700664ff --- /dev/null +++ b/build/torch27-cxx11-xpu20250-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f449e098ab5bbe9ca35e2a904132ed4e378e54d579aefe95e4e83e07a73bfe +size 2248696 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/rotary/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/rotary/__init__.py old mode 100644 new mode 100755 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..51fe290a3984f6fb962af5977b274a4f09bc5979 Binary files /dev/null and b/build/torch28-cxx11-cu126-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..b905458ddf063c5ee2a88c9d665ea0679237836a Binary files /dev/null and b/build/torch28-cxx11-cu126-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu126-x86_64-linux/rotary/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/rotary/_ops.py old mode 100644 new mode 100755 index dd4baa9daad5b83b5309896a2982c812d8c389b1..882a9998d9ad3e0226135704ffdc37207e41c499 --- a/build/torch28-cxx11-cu126-x86_64-linux/rotary/_ops.py +++ b/build/torch28-cxx11-cu126-x86_64-linux/rotary/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _rotary_d5e8892 -ops = torch.ops._rotary_d5e8892 +from . import _rotary_cd1413b_dirty +ops = torch.ops._rotary_cd1413b_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_rotary_d5e8892::{op_name}" \ No newline at end of file + return f"_rotary_cd1413b_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu126-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..71b6eaf4f9e54c8f1a01b2e2d01dc88382a8650e --- /dev/null +++ b/build/torch28-cxx11-cu126-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea610dc89fe7d0037e55ff171158e099f79a08fcd14a8aff117c7b090d79a6e2 +size 6817216 diff --git a/build/torch28-cxx11-cu126-x86_64-linux/rotary/_rotary_d5e8892.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/rotary/_rotary_d5e8892.abi3.so deleted file mode 100755 index 13479c760691c87133542d81e6dccf2a7cbf6bf8..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu126-x86_64-linux/rotary/_rotary_d5e8892.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:663ac605052413c175dcff4dec8545aa37ad67e8629002621741b89b182d98cd -size 6293168 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/rotary/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/rotary/__init__.py old mode 100644 new mode 100755 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..6710462bf948980a76edb3587ca3260eb7e93cb8 Binary files /dev/null and b/build/torch28-cxx11-cu128-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..932be89ecdd58624ee42ade87d921fe9beeacef0 Binary files /dev/null and b/build/torch28-cxx11-cu128-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu128-x86_64-linux/rotary/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/rotary/_ops.py old mode 100644 new mode 100755 index dd4baa9daad5b83b5309896a2982c812d8c389b1..882a9998d9ad3e0226135704ffdc37207e41c499 --- a/build/torch28-cxx11-cu128-x86_64-linux/rotary/_ops.py +++ b/build/torch28-cxx11-cu128-x86_64-linux/rotary/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _rotary_d5e8892 -ops = torch.ops._rotary_d5e8892 +from . import _rotary_cd1413b_dirty +ops = torch.ops._rotary_cd1413b_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_rotary_d5e8892::{op_name}" \ No newline at end of file + return f"_rotary_cd1413b_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu128-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..68f111fb1871eca79bbbb017cc4b6ea94c7e6165 --- /dev/null +++ b/build/torch28-cxx11-cu128-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f8af2b94a8121a7a8b6ac446cb6eb117d49cb4ea8842d7024bb1b9b26fb97db +size 10526424 diff --git a/build/torch28-cxx11-cu128-x86_64-linux/rotary/_rotary_d5e8892.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/rotary/_rotary_d5e8892.abi3.so deleted file mode 100755 index f3ada6a304ed16bf9a2af9af55ecdaa3b4b2eb95..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu128-x86_64-linux/rotary/_rotary_d5e8892.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:264410fa13bca33e706d1c3eb12a2d966e8fa07e2b786cbd8332d462f4883d1a -size 10149824 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/rotary/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/rotary/__init__.py old mode 100644 new mode 100755 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..3c0ebd8f6fd6ceaa1d5ebc8d7c8e875cda718787 Binary files /dev/null and b/build/torch28-cxx11-cu129-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-cu129-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..6ced4aee44fb96b785ecdb9bbfed7dfdebc7497d Binary files /dev/null and b/build/torch28-cxx11-cu129-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-cu129-x86_64-linux/rotary/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/rotary/_ops.py old mode 100644 new mode 100755 index dd4baa9daad5b83b5309896a2982c812d8c389b1..882a9998d9ad3e0226135704ffdc37207e41c499 --- a/build/torch28-cxx11-cu129-x86_64-linux/rotary/_ops.py +++ b/build/torch28-cxx11-cu129-x86_64-linux/rotary/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _rotary_d5e8892 -ops = torch.ops._rotary_d5e8892 +from . import _rotary_cd1413b_dirty +ops = torch.ops._rotary_cd1413b_dirty def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_rotary_d5e8892::{op_name}" \ No newline at end of file + return f"_rotary_cd1413b_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-cu129-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..17440b83122c4aa653435d73d771d12eac66b996 --- /dev/null +++ b/build/torch28-cxx11-cu129-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88098a942da850ef34bc5d4b2f810d9c3092718c134fba911161a04eba73c559 +size 10586840 diff --git a/build/torch28-cxx11-cu129-x86_64-linux/rotary/_rotary_d5e8892.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/rotary/_rotary_d5e8892.abi3.so deleted file mode 100755 index eec1aac04e680c4e7bbf7f8fd19ff12436015a81..0000000000000000000000000000000000000000 --- a/build/torch28-cxx11-cu129-x86_64-linux/rotary/_rotary_d5e8892.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd62535c2713d05e74f4b53c84564caeeba51aaf06f5fe59a3182b04a5ae3c5a -size 10169280 diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__init__.py b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..eba8039e210c8b710c5c663ef4e7930757f271be --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__init__.py @@ -0,0 +1,19 @@ +from typing import Tuple +import torch + +from ._ops import ops + + +def apply_rotary( + x1: torch.Tensor, + x2: torch.Tensor, + cos: torch.Tensor, + sin: torch.Tensor, + out1: torch.Tensor, + out2: torch.Tensor, + conj: bool, +): + ops.apply_rotary(x1, x2, cos, sin, out1, out2, conj) + + +__all__ = ["apply_rotary"] diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/__init__.cpython-311.pyc b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54f458b69bed5dbd698dfa26fe00c0b02c914e4a Binary files /dev/null and b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/__init__.cpython-311.pyc differ diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..c3534f51269371cdca09df12b23130847cd60863 Binary files /dev/null and b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/__init__.cpython-313.pyc differ diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/_ops.cpython-311.pyc b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/_ops.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c85210970cad2446f178ffdebe2715fb1925fd97 Binary files /dev/null and b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/_ops.cpython-311.pyc differ diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc new file mode 100755 index 0000000000000000000000000000000000000000..12f1525244249e7c96804944827577119a4c6e0c Binary files /dev/null and b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/__pycache__/_ops.cpython-313.pyc differ diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/_ops.py b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/_ops.py new file mode 100755 index 0000000000000000000000000000000000000000..882a9998d9ad3e0226135704ffdc37207e41c499 --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _rotary_cd1413b_dirty +ops = torch.ops._rotary_cd1413b_dirty + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_rotary_cd1413b_dirty::{op_name}" \ No newline at end of file diff --git a/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so new file mode 100755 index 0000000000000000000000000000000000000000..6f020bf2e546d40af6f3d01c393c7911a2df7642 --- /dev/null +++ b/build/torch28-cxx11-xpu20251-x86_64-linux/rotary/_rotary_cd1413b_dirty.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12dba600201d5c2bd5cd123afc3b65f835f3698a89b75e61c85ee3f359f2e901 +size 2239816 diff --git a/flake.lock b/flake.lock index 26319b67eda604001ec12fc6bdd52ee9bfe65846..9a10662457740e93808381972c1b39c46faea045 100644 --- a/flake.lock +++ b/flake.lock @@ -17,11 +17,11 @@ }, "flake-compat_2": { "locked": { - "lastModified": 1733328505, - "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=", + "lastModified": 1747046372, + "narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=", "owner": "edolstra", "repo": "flake-compat", - "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec", + "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885", "type": "github" }, "original": { @@ -73,11 +73,11 @@ "nixpkgs": "nixpkgs" }, "locked": { - "lastModified": 1753354560, - "narHash": "sha256-vmOfRmr0Qm/IbZTWB2sBn+UFrABSTTA/cTg+m27Yt/E=", + "lastModified": 1757493151, + "narHash": "sha256-eirWlcvs2rjZmU8JcF4CKN1IEnNfpQnGuf2qbK3IQh8=", "owner": "huggingface", "repo": "hf-nix", - "rev": "7f2aceda2a2e72cd573bdb25e5c0667fd75f89d3", + "rev": "503cd4eb9866103c983dbef93d9ad5db4fb6b415", "type": "github" }, "original": { @@ -98,33 +98,32 @@ ] }, "locked": { - "lastModified": 1753354632, - "narHash": "sha256-31SX3Raiyx0qCuY9JSlx9ZZgxljeUxvW+JdujjxbofQ=", + "lastModified": 1757570810, + "narHash": "sha256-YFWQwy2LKbhjdLW8wkyNkE/+Vbdn6qlJif2CKvBT9Qo=", "owner": "huggingface", "repo": "kernel-builder", - "rev": "524b628fd8e58525dbd28455bffb0628092c5265", + "rev": "1201847af3ff757b65015c6e06b5bd75896d2d4b", "type": "github" }, "original": { "owner": "huggingface", - "ref": "torch-2.8", "repo": "kernel-builder", "type": "github" } }, "nixpkgs": { "locked": { - "lastModified": 1752785354, - "narHash": "sha256-Y33ryUz7MPqKrZwlbQcsYCUz2jAJCacRf8jbs0tYUlA=", + "lastModified": 1755963616, + "narHash": "sha256-6yD0ww/S8n+U2uPYcJZ3DRURP8Kx036GRpR2uPNZroE=", "owner": "nixos", "repo": "nixpkgs", - "rev": "d38025438a6ee456758dc03188ca6873a415463b", + "rev": "73e96df7cff5783f45e21342a75a1540c4eddce4", "type": "github" }, "original": { "owner": "nixos", + "ref": "nixos-unstable-small", "repo": "nixpkgs", - "rev": "d38025438a6ee456758dc03188ca6873a415463b", "type": "github" } }, diff --git a/flake.nix b/flake.nix index f655a849d1d070474437c8ee846db3dc4b0b6965..a4c72b81489bcc67dff0d5a2857d765a0a74c5b0 100644 --- a/flake.nix +++ b/flake.nix @@ -1,15 +1,9 @@ { - description = "Flake for rotary kernel"; - + description = "Flake for Torch kernel extension"; inputs = { - kernel-builder.url = "github:huggingface/kernel-builder/torch-2.8"; + kernel-builder.url = "github:huggingface/kernel-builder"; }; - - outputs = - { - self, - kernel-builder, - }: + outputs = { self, kernel-builder, }: kernel-builder.lib.genFlakeOutputs { path = ./.; rev = self.shortRev or self.dirtyShortRev or self.lastModifiedDate; diff --git a/rotary-xpu/rotary_xpu.cpp b/rotary-xpu/rotary_xpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4d30083aeef5360cc129fd4abaacdb902d91dca9 --- /dev/null +++ b/rotary-xpu/rotary_xpu.cpp @@ -0,0 +1,40 @@ +#include +#include "rotary_xpu.hpp" + +void _apply_rotary(torch::Tensor const &x1, torch::Tensor const &x2, + torch::Tensor const &cos, torch::Tensor const &sin, + torch::Tensor &out1, torch::Tensor &out2, + bool const conj) { + auto iter = at::TensorIteratorConfig() + .add_output(out1) + .add_output(out2) + .add_input(x1) + .add_input(x2) + .add_input(cos) + .add_input(sin) + .check_all_same_dtype(false) + .promote_inputs_to_common_dtype(false) + .build(); + + if (!conj) { + AT_DISPATCH_FLOATING_TYPES_AND2(at::kBFloat16, at::kHalf, x1.scalar_type(), "rotary_kernel_xpu", [&] { + gpu_kernel_multiple_outputs( + iter, [] (scalar_t x1, scalar_t x2, scalar_t cos, + scalar_t sin) -> std::tuple { + scalar_t out1 = float(x1) * float(cos) - float(x2) * float(sin); + scalar_t out2 = float(x1) * float(sin) + float(x2) * float(cos); + return {out1, out2}; + }); + }); + } else { + AT_DISPATCH_FLOATING_TYPES_AND2(at::kBFloat16, at::kHalf, x1.scalar_type(), "rotary_kernel_xpu", [&] { + gpu_kernel_multiple_outputs( + iter, [] (scalar_t x1, scalar_t x2, scalar_t cos, + scalar_t sin) -> std::tuple { + scalar_t out1 = float(x1) * float(cos) + float(x2) * float(sin); + scalar_t out2 = -float(x1) * float(sin) + float(x2) * float(cos); + return {out1, out2}; + }); + }); + } +} diff --git a/rotary-xpu/rotary_xpu.hpp b/rotary-xpu/rotary_xpu.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5f0f6e24055695c710c86fbc6208907d160f36e2 --- /dev/null +++ b/rotary-xpu/rotary_xpu.hpp @@ -0,0 +1,375 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr int MAX_DIMS = 12; + +struct LoadWithoutCast { + template + C10_DEVICE scalar_t load(char* base_ptr, uint32_t offset, int arg) { + return c10::load(reinterpret_cast(base_ptr) + offset); + } +}; + +struct StoreWithoutCast { + template + C10_DEVICE void store(scalar_t value, char* base_ptr, uint32_t offset, int arg = 0) { + *(reinterpret_cast(base_ptr) + offset) = value; + } +}; + +template