danieldk HF Staff commited on
Commit
b4c51e9
·
1 Parent(s): 0041e3f

Build (x86_64-linux)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  2. build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  3. build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  4. build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  5. build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  6. build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  7. build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  8. build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  9. build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  10. build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  11. build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  12. build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py +0 -9
  13. build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so +0 -3
  14. build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__init__.py +0 -21
  15. build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  16. build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  17. build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  18. build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py +0 -173
  19. build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py +0 -9
  20. build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so +0 -3
  21. build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py +0 -92
  22. build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__init__.py +0 -21
  23. build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  24. build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  25. build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_custom_ops.py +0 -173
  26. build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py +0 -9
  27. build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so +0 -3
  28. build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py +0 -92
  29. build/{torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc → torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc} +0 -0
  30. build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  31. build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc +0 -0
  32. build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  33. build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  34. build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_ops.py +3 -3
  35. build/{torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so → torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_0041e3f.abi3.so} +2 -2
  36. build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so +0 -3
  37. build/{torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc → torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc} +0 -0
  38. build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  39. build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc +0 -0
  40. build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  41. build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  42. build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_ops.py +3 -3
  43. build/{torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so → torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_0041e3f.abi3.so} +2 -2
  44. build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so +0 -3
  45. build/{torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc → torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc} +0 -0
  46. build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  47. build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc +0 -0
  48. build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  49. build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  50. build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_ops.py +3 -3
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.71 kB)
 
build/torch26-cxx11-rocm62-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (548 Bytes)
 
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (509 Bytes)
 
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_6677800::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5ad161ff6226eb3f697c3fcec6051c70ca5bc0a66332f927a3cc3ecb39c34dd
3
- size 91821840
 
 
 
 
build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__init__.py DELETED
@@ -1,21 +0,0 @@
1
- from ._custom_ops import (
2
- convert_fp8,
3
- copy_blocks,
4
- paged_attention_v1,
5
- paged_attention_v2,
6
- reshape_and_cache,
7
- reshape_and_cache_flash,
8
- swap_blocks,
9
- )
10
- from ._ops import ops
11
-
12
- __all__ = [
13
- "convert_fp8",
14
- "copy_blocks",
15
- "ops",
16
- "paged_attention_v1",
17
- "paged_attention_v2",
18
- "reshape_and_cache",
19
- "reshape_and_cache_flash",
20
- "swap_blocks",
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (509 Bytes)
 
build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py DELETED
@@ -1,173 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # page attention ops
9
- def paged_attention_v1(
10
- out: torch.Tensor,
11
- query: torch.Tensor,
12
- key_cache: torch.Tensor,
13
- value_cache: torch.Tensor,
14
- num_kv_heads: int,
15
- scale: float,
16
- block_tables: torch.Tensor,
17
- seq_lens: torch.Tensor,
18
- block_size: int,
19
- max_seq_len: int,
20
- alibi_slopes: Optional[torch.Tensor],
21
- kv_cache_dtype: str,
22
- k_scale: float,
23
- v_scale: float,
24
- tp_rank: int = 0,
25
- blocksparse_local_blocks: int = 0,
26
- blocksparse_vert_stride: int = 0,
27
- blocksparse_block_size: int = 64,
28
- blocksparse_head_sliding_step: int = 0,
29
- ) -> None:
30
- ops.paged_attention_v1(
31
- out,
32
- query,
33
- key_cache,
34
- value_cache,
35
- num_kv_heads,
36
- scale,
37
- block_tables,
38
- seq_lens,
39
- block_size,
40
- max_seq_len,
41
- alibi_slopes,
42
- kv_cache_dtype,
43
- k_scale,
44
- v_scale,
45
- tp_rank,
46
- blocksparse_local_blocks,
47
- blocksparse_vert_stride,
48
- blocksparse_block_size,
49
- blocksparse_head_sliding_step,
50
- )
51
-
52
-
53
- def paged_attention_v2(
54
- out: torch.Tensor,
55
- exp_sum: torch.Tensor,
56
- max_logits: torch.Tensor,
57
- tmp_out: torch.Tensor,
58
- query: torch.Tensor,
59
- key_cache: torch.Tensor,
60
- value_cache: torch.Tensor,
61
- num_kv_heads: int,
62
- scale: float,
63
- block_tables: torch.Tensor,
64
- seq_lens: torch.Tensor,
65
- block_size: int,
66
- max_seq_len: int,
67
- alibi_slopes: Optional[torch.Tensor],
68
- kv_cache_dtype: str,
69
- k_scale: float,
70
- v_scale: float,
71
- tp_rank: int = 0,
72
- blocksparse_local_blocks: int = 0,
73
- blocksparse_vert_stride: int = 0,
74
- blocksparse_block_size: int = 64,
75
- blocksparse_head_sliding_step: int = 0,
76
- ) -> None:
77
- ops.paged_attention_v2(
78
- out,
79
- exp_sum,
80
- max_logits,
81
- tmp_out,
82
- query,
83
- key_cache,
84
- value_cache,
85
- num_kv_heads,
86
- scale,
87
- block_tables,
88
- seq_lens,
89
- block_size,
90
- max_seq_len,
91
- alibi_slopes,
92
- kv_cache_dtype,
93
- k_scale,
94
- v_scale,
95
- tp_rank,
96
- blocksparse_local_blocks,
97
- blocksparse_vert_stride,
98
- blocksparse_block_size,
99
- blocksparse_head_sliding_step,
100
- )
101
-
102
-
103
- def reshape_and_cache(
104
- key: torch.Tensor,
105
- value: torch.Tensor,
106
- key_cache: torch.Tensor,
107
- value_cache: torch.Tensor,
108
- slot_mapping: torch.Tensor,
109
- kv_cache_dtype: str,
110
- k_scale: float,
111
- v_scale: float,
112
- ) -> None:
113
- ops.reshape_and_cache(
114
- key,
115
- value,
116
- key_cache,
117
- value_cache,
118
- slot_mapping,
119
- kv_cache_dtype,
120
- k_scale,
121
- v_scale,
122
- )
123
-
124
-
125
- def reshape_and_cache_flash(
126
- key: torch.Tensor,
127
- value: torch.Tensor,
128
- key_cache: torch.Tensor,
129
- value_cache: torch.Tensor,
130
- slot_mapping: torch.Tensor,
131
- kv_cache_dtype: str,
132
- k_scale: torch.Tensor,
133
- v_scale: torch.Tensor,
134
- ) -> None:
135
- ops.reshape_and_cache_flash(
136
- key,
137
- value,
138
- key_cache,
139
- value_cache,
140
- slot_mapping,
141
- kv_cache_dtype,
142
- k_scale,
143
- v_scale,
144
- )
145
-
146
-
147
- def copy_blocks(
148
- key_caches: List[torch.Tensor],
149
- value_caches: List[torch.Tensor],
150
- block_mapping: torch.Tensor,
151
- ) -> None:
152
- ops.copy_blocks(key_caches, value_caches, block_mapping)
153
-
154
-
155
- def swap_blocks(
156
- src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
157
- ) -> None:
158
- ops.swap_blocks(src, dst, block_mapping)
159
-
160
-
161
- def convert_fp8(
162
- output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
163
- ) -> None:
164
- ops.convert_fp8(output, input, scale, kv_dtype)
165
-
166
-
167
- __all__ = [
168
- "convert_fp8",
169
- "paged_attention_v1",
170
- "paged_attention_v2",
171
- "reshape_and_cache",
172
- "copy_blocks",
173
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_6677800::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc095cafd06c184ecb8e2268bf1a4dbded38098c84880bdd9beb87a856a553f
3
- size 88631224
 
 
 
 
build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py DELETED
@@ -1,92 +0,0 @@
1
- import os
2
- import random
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache, wraps
5
- from typing import Callable, ParamSpec, TypeVar
6
-
7
- import numpy as np
8
- import torch
9
-
10
- IS_ROCM = torch.version.hip is not None
11
- IS_MPS = torch.backends.mps.is_available()
12
-
13
-
14
- class Platform(ABC):
15
- @classmethod
16
- def seed_everything(cls, seed: int) -> None:
17
- """
18
- Set the seed of each random module.
19
- `torch.manual_seed` will set seed on all devices.
20
-
21
- Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
22
- """
23
- random.seed(seed)
24
- np.random.seed(seed)
25
- torch.manual_seed(seed)
26
-
27
- @abstractmethod
28
- def get_device_name(self, device_id: int = 0) -> str: ...
29
-
30
- @abstractmethod
31
- def is_cuda(self) -> bool: ...
32
-
33
- @abstractmethod
34
- def is_rocm(self) -> bool: ...
35
-
36
- @abstractmethod
37
- def is_mps(self) -> bool: ...
38
-
39
-
40
- class CudaPlatform(Platform):
41
- @classmethod
42
- @lru_cache(maxsize=8)
43
- def get_device_name(cls, device_id: int = 0) -> str:
44
- return torch.cuda.get_device_name(0)
45
-
46
- def is_cuda(self) -> bool:
47
- return True
48
-
49
- def is_rocm(self) -> bool:
50
- return False
51
-
52
- def is_mps(self) -> bool:
53
- return False
54
-
55
-
56
- class RocmPlatform(Platform):
57
- @classmethod
58
- @lru_cache(maxsize=8)
59
- def get_device_name(cls, device_id: int = 0) -> str:
60
- return torch.cuda.get_device_name(device_id)
61
-
62
- def is_cuda(self) -> bool:
63
- return False
64
-
65
- def is_rocm(self) -> bool:
66
- return True
67
-
68
- def is_mps(self) -> bool:
69
- return False
70
-
71
-
72
- class MpsPlatform(Platform):
73
- @classmethod
74
- @lru_cache(maxsize=8)
75
- def get_device_name(cls, device_id: int = 0) -> str:
76
- return torch.cuda.get_device_name(device_id)
77
-
78
- def is_cuda(self) -> bool:
79
- return False
80
-
81
- def is_rocm(self) -> bool:
82
- return False
83
-
84
- def is_mps(self) -> bool:
85
- return True
86
-
87
- current_platform = (
88
- RocmPlatform() if IS_ROCM else
89
- MpsPlatform() if IS_MPS else
90
- CudaPlatform() if torch.cuda.is_available() else
91
- None
92
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__init__.py DELETED
@@ -1,21 +0,0 @@
1
- from ._custom_ops import (
2
- convert_fp8,
3
- copy_blocks,
4
- paged_attention_v1,
5
- paged_attention_v2,
6
- reshape_and_cache,
7
- reshape_and_cache_flash,
8
- swap_blocks,
9
- )
10
- from ._ops import ops
11
-
12
- __all__ = [
13
- "convert_fp8",
14
- "copy_blocks",
15
- "ops",
16
- "paged_attention_v1",
17
- "paged_attention_v2",
18
- "reshape_and_cache",
19
- "reshape_and_cache_flash",
20
- "swap_blocks",
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_custom_ops.py DELETED
@@ -1,173 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # page attention ops
9
- def paged_attention_v1(
10
- out: torch.Tensor,
11
- query: torch.Tensor,
12
- key_cache: torch.Tensor,
13
- value_cache: torch.Tensor,
14
- num_kv_heads: int,
15
- scale: float,
16
- block_tables: torch.Tensor,
17
- seq_lens: torch.Tensor,
18
- block_size: int,
19
- max_seq_len: int,
20
- alibi_slopes: Optional[torch.Tensor],
21
- kv_cache_dtype: str,
22
- k_scale: float,
23
- v_scale: float,
24
- tp_rank: int = 0,
25
- blocksparse_local_blocks: int = 0,
26
- blocksparse_vert_stride: int = 0,
27
- blocksparse_block_size: int = 64,
28
- blocksparse_head_sliding_step: int = 0,
29
- ) -> None:
30
- ops.paged_attention_v1(
31
- out,
32
- query,
33
- key_cache,
34
- value_cache,
35
- num_kv_heads,
36
- scale,
37
- block_tables,
38
- seq_lens,
39
- block_size,
40
- max_seq_len,
41
- alibi_slopes,
42
- kv_cache_dtype,
43
- k_scale,
44
- v_scale,
45
- tp_rank,
46
- blocksparse_local_blocks,
47
- blocksparse_vert_stride,
48
- blocksparse_block_size,
49
- blocksparse_head_sliding_step,
50
- )
51
-
52
-
53
- def paged_attention_v2(
54
- out: torch.Tensor,
55
- exp_sum: torch.Tensor,
56
- max_logits: torch.Tensor,
57
- tmp_out: torch.Tensor,
58
- query: torch.Tensor,
59
- key_cache: torch.Tensor,
60
- value_cache: torch.Tensor,
61
- num_kv_heads: int,
62
- scale: float,
63
- block_tables: torch.Tensor,
64
- seq_lens: torch.Tensor,
65
- block_size: int,
66
- max_seq_len: int,
67
- alibi_slopes: Optional[torch.Tensor],
68
- kv_cache_dtype: str,
69
- k_scale: float,
70
- v_scale: float,
71
- tp_rank: int = 0,
72
- blocksparse_local_blocks: int = 0,
73
- blocksparse_vert_stride: int = 0,
74
- blocksparse_block_size: int = 64,
75
- blocksparse_head_sliding_step: int = 0,
76
- ) -> None:
77
- ops.paged_attention_v2(
78
- out,
79
- exp_sum,
80
- max_logits,
81
- tmp_out,
82
- query,
83
- key_cache,
84
- value_cache,
85
- num_kv_heads,
86
- scale,
87
- block_tables,
88
- seq_lens,
89
- block_size,
90
- max_seq_len,
91
- alibi_slopes,
92
- kv_cache_dtype,
93
- k_scale,
94
- v_scale,
95
- tp_rank,
96
- blocksparse_local_blocks,
97
- blocksparse_vert_stride,
98
- blocksparse_block_size,
99
- blocksparse_head_sliding_step,
100
- )
101
-
102
-
103
- def reshape_and_cache(
104
- key: torch.Tensor,
105
- value: torch.Tensor,
106
- key_cache: torch.Tensor,
107
- value_cache: torch.Tensor,
108
- slot_mapping: torch.Tensor,
109
- kv_cache_dtype: str,
110
- k_scale: float,
111
- v_scale: float,
112
- ) -> None:
113
- ops.reshape_and_cache(
114
- key,
115
- value,
116
- key_cache,
117
- value_cache,
118
- slot_mapping,
119
- kv_cache_dtype,
120
- k_scale,
121
- v_scale,
122
- )
123
-
124
-
125
- def reshape_and_cache_flash(
126
- key: torch.Tensor,
127
- value: torch.Tensor,
128
- key_cache: torch.Tensor,
129
- value_cache: torch.Tensor,
130
- slot_mapping: torch.Tensor,
131
- kv_cache_dtype: str,
132
- k_scale: torch.Tensor,
133
- v_scale: torch.Tensor,
134
- ) -> None:
135
- ops.reshape_and_cache_flash(
136
- key,
137
- value,
138
- key_cache,
139
- value_cache,
140
- slot_mapping,
141
- kv_cache_dtype,
142
- k_scale,
143
- v_scale,
144
- )
145
-
146
-
147
- def copy_blocks(
148
- key_caches: List[torch.Tensor],
149
- value_caches: List[torch.Tensor],
150
- block_mapping: torch.Tensor,
151
- ) -> None:
152
- ops.copy_blocks(key_caches, value_caches, block_mapping)
153
-
154
-
155
- def swap_blocks(
156
- src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
157
- ) -> None:
158
- ops.swap_blocks(src, dst, block_mapping)
159
-
160
-
161
- def convert_fp8(
162
- output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
163
- ) -> None:
164
- ops.convert_fp8(output, input, scale, kv_dtype)
165
-
166
-
167
- __all__ = [
168
- "convert_fp8",
169
- "paged_attention_v1",
170
- "paged_attention_v2",
171
- "reshape_and_cache",
172
- "copy_blocks",
173
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_6677800::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe20bc582f45af4d0819856e77a7aa31a7d7ef4f821e362e7972a4e8bb6c2eba
3
- size 88390208
 
 
 
 
build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py DELETED
@@ -1,92 +0,0 @@
1
- import os
2
- import random
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache, wraps
5
- from typing import Callable, ParamSpec, TypeVar
6
-
7
- import numpy as np
8
- import torch
9
-
10
- IS_ROCM = torch.version.hip is not None
11
- IS_MPS = torch.backends.mps.is_available()
12
-
13
-
14
- class Platform(ABC):
15
- @classmethod
16
- def seed_everything(cls, seed: int) -> None:
17
- """
18
- Set the seed of each random module.
19
- `torch.manual_seed` will set seed on all devices.
20
-
21
- Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
22
- """
23
- random.seed(seed)
24
- np.random.seed(seed)
25
- torch.manual_seed(seed)
26
-
27
- @abstractmethod
28
- def get_device_name(self, device_id: int = 0) -> str: ...
29
-
30
- @abstractmethod
31
- def is_cuda(self) -> bool: ...
32
-
33
- @abstractmethod
34
- def is_rocm(self) -> bool: ...
35
-
36
- @abstractmethod
37
- def is_mps(self) -> bool: ...
38
-
39
-
40
- class CudaPlatform(Platform):
41
- @classmethod
42
- @lru_cache(maxsize=8)
43
- def get_device_name(cls, device_id: int = 0) -> str:
44
- return torch.cuda.get_device_name(0)
45
-
46
- def is_cuda(self) -> bool:
47
- return True
48
-
49
- def is_rocm(self) -> bool:
50
- return False
51
-
52
- def is_mps(self) -> bool:
53
- return False
54
-
55
-
56
- class RocmPlatform(Platform):
57
- @classmethod
58
- @lru_cache(maxsize=8)
59
- def get_device_name(cls, device_id: int = 0) -> str:
60
- return torch.cuda.get_device_name(device_id)
61
-
62
- def is_cuda(self) -> bool:
63
- return False
64
-
65
- def is_rocm(self) -> bool:
66
- return True
67
-
68
- def is_mps(self) -> bool:
69
- return False
70
-
71
-
72
- class MpsPlatform(Platform):
73
- @classmethod
74
- @lru_cache(maxsize=8)
75
- def get_device_name(cls, device_id: int = 0) -> str:
76
- return torch.cuda.get_device_name(device_id)
77
-
78
- def is_cuda(self) -> bool:
79
- return False
80
-
81
- def is_rocm(self) -> bool:
82
- return False
83
-
84
- def is_mps(self) -> bool:
85
- return True
86
-
87
- current_platform = (
88
- RocmPlatform() if IS_ROCM else
89
- MpsPlatform() if IS_MPS else
90
- CudaPlatform() if torch.cuda.is_available() else
91
- None
92
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/{torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc → torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc} RENAMED
Binary files a/build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc differ
 
build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc ADDED
Binary file (4.72 kB). View file
 
build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (541 Bytes). View file
 
build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_6677800::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_0041e3f
3
+ ops = torch.ops._paged_attention_0041e3f
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_0041e3f::{op_name}"
build/{torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so → torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_0041e3f.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bc02dd09d997be7c2d3c996d5716ff269b4e4094b6cab70f4ae73c3763c36aa
3
- size 88666456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b899f376425b8d7a213b8d26909d84ffb1213e03d3e5b33675e9408426747501
3
+ size 113844912
build/torch27-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7e9598cbcf88d836c2cbf737dafc4513e373b81f383acfce0aa74227600a166
3
- size 91845296
 
 
 
 
build/{torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc → torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc} RENAMED
Binary files a/build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc differ
 
build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc ADDED
Binary file (4.72 kB). View file
 
build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch27-cxx11-cu126-x86_64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (541 Bytes). View file
 
build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_6677800::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_0041e3f
3
+ ops = torch.ops._paged_attention_0041e3f
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_0041e3f::{op_name}"
build/{torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so → torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_0041e3f.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5077b6b8fffb349c79738c345b02903f643aa9530d10269ea143e8f3125d10e9
3
- size 88425448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f98a11d55d75ee841515253dfd16b5dd17a811925a445ef765b90b4b56a10e35
3
+ size 110732296
build/torch27-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_6677800.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e14b5caf109855ba826adef443fc4568a170ba2bbd1738b7bbdcec5e43f38cfd
3
- size 88425480
 
 
 
 
build/{torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc → torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc} RENAMED
Binary files a/build/torch27-cxx11-cu118-x86_64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc differ
 
build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.7 kB)
 
build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc ADDED
Binary file (4.72 kB). View file
 
build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (547 Bytes)
 
build/torch27-cxx11-cu128-x86_64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (541 Bytes). View file
 
build/torch27-cxx11-cu128-x86_64-linux/paged_attention/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_6677800::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_0041e3f
3
+ ops = torch.ops._paged_attention_0041e3f
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_0041e3f::{op_name}"