da0609001d38541f2e1d84b2fab95a3e5cb5413337fc2247150c3f19aae1664e
Browse files- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/INSTALLER +1 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/LICENSE +29 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/METADATA +168 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/RECORD +104 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/WHEEL +5 -0
- lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/top_level.txt +1 -0
- lib/python3.11/site-packages/fsspec/parquet.py +551 -0
- lib/python3.11/site-packages/fsspec/registry.py +299 -0
- lib/python3.11/site-packages/fsspec/spec.py +1963 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py +287 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/common.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/put.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/common.py +175 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/copy.py +543 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/get.py +587 -0
- lib/python3.11/site-packages/fsspec/tests/abstract/put.py +577 -0
- lib/python3.11/site-packages/fsspec/transaction.py +85 -0
- lib/python3.11/site-packages/fsspec/utils.py +742 -0
- lib/python3.11/site-packages/functorch/_C.cpython-311-darwin.so +0 -0
- lib/python3.11/site-packages/functorch/__init__.py +38 -0
- lib/python3.11/site-packages/functorch/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/__init__.py +0 -0
- lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py +8 -0
- lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/eager_transforms/__init__.py +7 -0
- lib/python3.11/site-packages/functorch/_src/eager_transforms/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py +4 -0
- lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/_src/vmap/__init__.py +16 -0
- lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/compile/__init__.py +31 -0
- lib/python3.11/site-packages/functorch/compile/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__init__.py +179 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/batch_tensor.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/delayed_mul_tensor.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/dim.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/op_properties.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/tree_map.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/__pycache__/wrap_type.cpython-311.pyc +0 -0
- lib/python3.11/site-packages/functorch/dim/batch_tensor.py +25 -0
- lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py +77 -0
- lib/python3.11/site-packages/functorch/dim/dim.py +110 -0
- lib/python3.11/site-packages/functorch/dim/magic_trace.py +42 -0
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/INSTALLER
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
pip
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/LICENSE
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
BSD 3-Clause License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2018, Martin Durant
|
| 4 |
+
All rights reserved.
|
| 5 |
+
|
| 6 |
+
Redistribution and use in source and binary forms, with or without
|
| 7 |
+
modification, are permitted provided that the following conditions are met:
|
| 8 |
+
|
| 9 |
+
* Redistributions of source code must retain the above copyright notice, this
|
| 10 |
+
list of conditions and the following disclaimer.
|
| 11 |
+
|
| 12 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
| 13 |
+
this list of conditions and the following disclaimer in the documentation
|
| 14 |
+
and/or other materials provided with the distribution.
|
| 15 |
+
|
| 16 |
+
* Neither the name of the copyright holder nor the names of its
|
| 17 |
+
contributors may be used to endorse or promote products derived from
|
| 18 |
+
this software without specific prior written permission.
|
| 19 |
+
|
| 20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| 21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| 22 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
| 23 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
| 24 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
| 25 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
| 26 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
| 27 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
| 28 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 29 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/METADATA
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.1
|
| 2 |
+
Name: fsspec
|
| 3 |
+
Version: 2023.12.2
|
| 4 |
+
Summary: File-system specification
|
| 5 |
+
Home-page: https://github.com/fsspec/filesystem_spec
|
| 6 |
+
Maintainer: Martin Durant
|
| 7 |
+
Maintainer-email: [email protected]
|
| 8 |
+
License: BSD
|
| 9 |
+
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
| 10 |
+
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
| 11 |
+
Keywords: file
|
| 12 |
+
Classifier: Development Status :: 4 - Beta
|
| 13 |
+
Classifier: Intended Audience :: Developers
|
| 14 |
+
Classifier: License :: OSI Approved :: BSD License
|
| 15 |
+
Classifier: Operating System :: OS Independent
|
| 16 |
+
Classifier: Programming Language :: Python :: 3.8
|
| 17 |
+
Classifier: Programming Language :: Python :: 3.9
|
| 18 |
+
Classifier: Programming Language :: Python :: 3.10
|
| 19 |
+
Classifier: Programming Language :: Python :: 3.11
|
| 20 |
+
Requires-Python: >=3.8
|
| 21 |
+
Description-Content-Type: text/markdown
|
| 22 |
+
License-File: LICENSE
|
| 23 |
+
Provides-Extra: abfs
|
| 24 |
+
Requires-Dist: adlfs ; extra == 'abfs'
|
| 25 |
+
Provides-Extra: adl
|
| 26 |
+
Requires-Dist: adlfs ; extra == 'adl'
|
| 27 |
+
Provides-Extra: arrow
|
| 28 |
+
Requires-Dist: pyarrow >=1 ; extra == 'arrow'
|
| 29 |
+
Provides-Extra: dask
|
| 30 |
+
Requires-Dist: dask ; extra == 'dask'
|
| 31 |
+
Requires-Dist: distributed ; extra == 'dask'
|
| 32 |
+
Provides-Extra: devel
|
| 33 |
+
Requires-Dist: pytest ; extra == 'devel'
|
| 34 |
+
Requires-Dist: pytest-cov ; extra == 'devel'
|
| 35 |
+
Provides-Extra: dropbox
|
| 36 |
+
Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
|
| 37 |
+
Requires-Dist: requests ; extra == 'dropbox'
|
| 38 |
+
Requires-Dist: dropbox ; extra == 'dropbox'
|
| 39 |
+
Provides-Extra: entrypoints
|
| 40 |
+
Provides-Extra: full
|
| 41 |
+
Requires-Dist: adlfs ; extra == 'full'
|
| 42 |
+
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'full'
|
| 43 |
+
Requires-Dist: dask ; extra == 'full'
|
| 44 |
+
Requires-Dist: distributed ; extra == 'full'
|
| 45 |
+
Requires-Dist: dropbox ; extra == 'full'
|
| 46 |
+
Requires-Dist: dropboxdrivefs ; extra == 'full'
|
| 47 |
+
Requires-Dist: fusepy ; extra == 'full'
|
| 48 |
+
Requires-Dist: gcsfs ; extra == 'full'
|
| 49 |
+
Requires-Dist: libarchive-c ; extra == 'full'
|
| 50 |
+
Requires-Dist: ocifs ; extra == 'full'
|
| 51 |
+
Requires-Dist: panel ; extra == 'full'
|
| 52 |
+
Requires-Dist: paramiko ; extra == 'full'
|
| 53 |
+
Requires-Dist: pyarrow >=1 ; extra == 'full'
|
| 54 |
+
Requires-Dist: pygit2 ; extra == 'full'
|
| 55 |
+
Requires-Dist: requests ; extra == 'full'
|
| 56 |
+
Requires-Dist: s3fs ; extra == 'full'
|
| 57 |
+
Requires-Dist: smbprotocol ; extra == 'full'
|
| 58 |
+
Requires-Dist: tqdm ; extra == 'full'
|
| 59 |
+
Provides-Extra: fuse
|
| 60 |
+
Requires-Dist: fusepy ; extra == 'fuse'
|
| 61 |
+
Provides-Extra: gcs
|
| 62 |
+
Requires-Dist: gcsfs ; extra == 'gcs'
|
| 63 |
+
Provides-Extra: git
|
| 64 |
+
Requires-Dist: pygit2 ; extra == 'git'
|
| 65 |
+
Provides-Extra: github
|
| 66 |
+
Requires-Dist: requests ; extra == 'github'
|
| 67 |
+
Provides-Extra: gs
|
| 68 |
+
Requires-Dist: gcsfs ; extra == 'gs'
|
| 69 |
+
Provides-Extra: gui
|
| 70 |
+
Requires-Dist: panel ; extra == 'gui'
|
| 71 |
+
Provides-Extra: hdfs
|
| 72 |
+
Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
|
| 73 |
+
Provides-Extra: http
|
| 74 |
+
Requires-Dist: requests ; extra == 'http'
|
| 75 |
+
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
|
| 76 |
+
Provides-Extra: libarchive
|
| 77 |
+
Requires-Dist: libarchive-c ; extra == 'libarchive'
|
| 78 |
+
Provides-Extra: oci
|
| 79 |
+
Requires-Dist: ocifs ; extra == 'oci'
|
| 80 |
+
Provides-Extra: s3
|
| 81 |
+
Requires-Dist: s3fs ; extra == 's3'
|
| 82 |
+
Provides-Extra: sftp
|
| 83 |
+
Requires-Dist: paramiko ; extra == 'sftp'
|
| 84 |
+
Provides-Extra: smb
|
| 85 |
+
Requires-Dist: smbprotocol ; extra == 'smb'
|
| 86 |
+
Provides-Extra: ssh
|
| 87 |
+
Requires-Dist: paramiko ; extra == 'ssh'
|
| 88 |
+
Provides-Extra: tqdm
|
| 89 |
+
Requires-Dist: tqdm ; extra == 'tqdm'
|
| 90 |
+
|
| 91 |
+
# filesystem_spec
|
| 92 |
+
|
| 93 |
+
[](https://pypi.python.org/pypi/fsspec/)
|
| 94 |
+
[](https://anaconda.org/conda-forge/fsspec)
|
| 95 |
+

|
| 96 |
+
[](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
| 97 |
+
[](https://pepy.tech/project/fsspec)
|
| 98 |
+
|
| 99 |
+
A specification for pythonic filesystems.
|
| 100 |
+
|
| 101 |
+
## Install
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
pip install fsspec
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
would install the base fsspec. Various optionally supported features might require specification of custom
|
| 108 |
+
extra require, e.g. `pip install fsspec[ssh]` will install dependencies for `ssh` backends support.
|
| 109 |
+
Use `pip install fsspec[full]` for installation of all known extra dependencies.
|
| 110 |
+
|
| 111 |
+
Up-to-date package also provided through conda-forge distribution:
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
conda install -c conda-forge fsspec
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
## Purpose
|
| 119 |
+
|
| 120 |
+
To produce a template or specification for a file-system interface, that specific implementations should follow,
|
| 121 |
+
so that applications making use of them can rely on a common behaviour and not have to worry about the specific
|
| 122 |
+
internal implementation decisions with any given backend. Many such implementations are included in this package,
|
| 123 |
+
or in sister projects such as `s3fs` and `gcsfs`.
|
| 124 |
+
|
| 125 |
+
In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE
|
| 126 |
+
mounting of the file-system implementation may be available for all implementations "for free".
|
| 127 |
+
|
| 128 |
+
## Documentation
|
| 129 |
+
|
| 130 |
+
Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
| 131 |
+
|
| 132 |
+
## Develop
|
| 133 |
+
|
| 134 |
+
fsspec uses GitHub Actions for CI. Environment files can be found
|
| 135 |
+
in the "ci/" directory. Note that the main environment is called "py38",
|
| 136 |
+
but it is expected that the version of python installed be adjustable at
|
| 137 |
+
CI runtime. For local use, pick a version suitable for you.
|
| 138 |
+
|
| 139 |
+
### Testing
|
| 140 |
+
|
| 141 |
+
Tests can be run in the dev environment, if activated, via ``pytest fsspec``.
|
| 142 |
+
|
| 143 |
+
The full fsspec suite requires a system-level docker, docker-compose, and fuse
|
| 144 |
+
installation. If only making changes to one backend implementation, it is
|
| 145 |
+
not generally necessary to run all tests locally.
|
| 146 |
+
|
| 147 |
+
It is expected that contributors ensure that any change to fsspec does not
|
| 148 |
+
cause issues or regressions for either other fsspec-related packages such
|
| 149 |
+
as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI
|
| 150 |
+
run and corresponding environment file run a set of tests from the dask
|
| 151 |
+
test suite, and very minimal tests against pandas and zarr from the
|
| 152 |
+
test_downstream.py module in this repo.
|
| 153 |
+
|
| 154 |
+
### Code Formatting
|
| 155 |
+
|
| 156 |
+
fsspec uses [Black](https://black.readthedocs.io/en/stable) to ensure
|
| 157 |
+
a consistent code format throughout the project.
|
| 158 |
+
Run ``black fsspec`` from the root of the filesystem_spec repository to
|
| 159 |
+
auto-format your code. Additionally, many editors have plugins that will apply
|
| 160 |
+
``black`` as you edit files. ``black`` is included in the ``tox`` environments.
|
| 161 |
+
|
| 162 |
+
Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to
|
| 163 |
+
automatically run ``black`` when you make a git commit.
|
| 164 |
+
Run ``pre-commit install --install-hooks`` from the root of the
|
| 165 |
+
filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
|
| 166 |
+
before you commit, reformatting any changed files. You can format without
|
| 167 |
+
committing via ``pre-commit run`` or skip these checks with ``git commit
|
| 168 |
+
--no-verify``.
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/RECORD
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fsspec-2023.12.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
| 2 |
+
fsspec-2023.12.2.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
|
| 3 |
+
fsspec-2023.12.2.dist-info/METADATA,sha256=toLeg14fW_MfA33P2NVIPEyWFL7k004pAolypgHrECQ,6829
|
| 4 |
+
fsspec-2023.12.2.dist-info/RECORD,,
|
| 5 |
+
fsspec-2023.12.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
| 6 |
+
fsspec-2023.12.2.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
|
| 7 |
+
fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
|
| 8 |
+
fsspec/__pycache__/__init__.cpython-311.pyc,,
|
| 9 |
+
fsspec/__pycache__/_version.cpython-311.pyc,,
|
| 10 |
+
fsspec/__pycache__/archive.cpython-311.pyc,,
|
| 11 |
+
fsspec/__pycache__/asyn.cpython-311.pyc,,
|
| 12 |
+
fsspec/__pycache__/caching.cpython-311.pyc,,
|
| 13 |
+
fsspec/__pycache__/callbacks.cpython-311.pyc,,
|
| 14 |
+
fsspec/__pycache__/compression.cpython-311.pyc,,
|
| 15 |
+
fsspec/__pycache__/config.cpython-311.pyc,,
|
| 16 |
+
fsspec/__pycache__/conftest.cpython-311.pyc,,
|
| 17 |
+
fsspec/__pycache__/core.cpython-311.pyc,,
|
| 18 |
+
fsspec/__pycache__/dircache.cpython-311.pyc,,
|
| 19 |
+
fsspec/__pycache__/exceptions.cpython-311.pyc,,
|
| 20 |
+
fsspec/__pycache__/fuse.cpython-311.pyc,,
|
| 21 |
+
fsspec/__pycache__/generic.cpython-311.pyc,,
|
| 22 |
+
fsspec/__pycache__/gui.cpython-311.pyc,,
|
| 23 |
+
fsspec/__pycache__/mapping.cpython-311.pyc,,
|
| 24 |
+
fsspec/__pycache__/parquet.cpython-311.pyc,,
|
| 25 |
+
fsspec/__pycache__/registry.cpython-311.pyc,,
|
| 26 |
+
fsspec/__pycache__/spec.cpython-311.pyc,,
|
| 27 |
+
fsspec/__pycache__/transaction.cpython-311.pyc,,
|
| 28 |
+
fsspec/__pycache__/utils.cpython-311.pyc,,
|
| 29 |
+
fsspec/_version.py,sha256=Kf9CIUDExVlqHjn9lLOn0QJcfeRWAe0PFvFHkRzI9iA,501
|
| 30 |
+
fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
|
| 31 |
+
fsspec/asyn.py,sha256=wx6vr5eBJYdW7a2cyv-LkfWu5dCDCcAjcDKjp3ylgR0,36154
|
| 32 |
+
fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
|
| 33 |
+
fsspec/callbacks.py,sha256=qmD1v-WWxWmTmcUkEadq-_F_n3OGp9JYarjupUq_j3o,6358
|
| 34 |
+
fsspec/compression.py,sha256=Zrbbb_m2SCF427BMJRYbDKMuSZIIV2YqteoS7AdR8Sc,4867
|
| 35 |
+
fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
|
| 36 |
+
fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
|
| 37 |
+
fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
|
| 38 |
+
fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
|
| 39 |
+
fsspec/exceptions.py,sha256=s5eA2wIwzj-aeV0i_KDXsBaIhJJRKzmMGUGwuBHTnS4,348
|
| 40 |
+
fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
|
| 41 |
+
fsspec/generic.py,sha256=2EcEegwdTLyQ2qSgz3Y6cbAuiWz7bybsEWai_XYkGtw,13457
|
| 42 |
+
fsspec/gui.py,sha256=BEVFplRsQyakNeCWU-vyZBD-16x_flEe0XiDxXparEU,13913
|
| 43 |
+
fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
| 44 |
+
fsspec/implementations/__pycache__/__init__.cpython-311.pyc,,
|
| 45 |
+
fsspec/implementations/__pycache__/arrow.cpython-311.pyc,,
|
| 46 |
+
fsspec/implementations/__pycache__/cache_mapper.cpython-311.pyc,,
|
| 47 |
+
fsspec/implementations/__pycache__/cache_metadata.cpython-311.pyc,,
|
| 48 |
+
fsspec/implementations/__pycache__/cached.cpython-311.pyc,,
|
| 49 |
+
fsspec/implementations/__pycache__/dask.cpython-311.pyc,,
|
| 50 |
+
fsspec/implementations/__pycache__/data.cpython-311.pyc,,
|
| 51 |
+
fsspec/implementations/__pycache__/dbfs.cpython-311.pyc,,
|
| 52 |
+
fsspec/implementations/__pycache__/dirfs.cpython-311.pyc,,
|
| 53 |
+
fsspec/implementations/__pycache__/ftp.cpython-311.pyc,,
|
| 54 |
+
fsspec/implementations/__pycache__/git.cpython-311.pyc,,
|
| 55 |
+
fsspec/implementations/__pycache__/github.cpython-311.pyc,,
|
| 56 |
+
fsspec/implementations/__pycache__/http.cpython-311.pyc,,
|
| 57 |
+
fsspec/implementations/__pycache__/jupyter.cpython-311.pyc,,
|
| 58 |
+
fsspec/implementations/__pycache__/libarchive.cpython-311.pyc,,
|
| 59 |
+
fsspec/implementations/__pycache__/local.cpython-311.pyc,,
|
| 60 |
+
fsspec/implementations/__pycache__/memory.cpython-311.pyc,,
|
| 61 |
+
fsspec/implementations/__pycache__/reference.cpython-311.pyc,,
|
| 62 |
+
fsspec/implementations/__pycache__/sftp.cpython-311.pyc,,
|
| 63 |
+
fsspec/implementations/__pycache__/smb.cpython-311.pyc,,
|
| 64 |
+
fsspec/implementations/__pycache__/tar.cpython-311.pyc,,
|
| 65 |
+
fsspec/implementations/__pycache__/webhdfs.cpython-311.pyc,,
|
| 66 |
+
fsspec/implementations/__pycache__/zip.cpython-311.pyc,,
|
| 67 |
+
fsspec/implementations/arrow.py,sha256=1d-c5KceQJxm8QXML8fFXHvQx0wstG-tNJNsrgMX_CI,8240
|
| 68 |
+
fsspec/implementations/cache_mapper.py,sha256=nE_sY3vw-jJbeBcAP6NGtacP3jHW_7EcG3yUSf0A-4Y,2502
|
| 69 |
+
fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
|
| 70 |
+
fsspec/implementations/cached.py,sha256=jCQSAIiO7M8OOmwG4cCYn4LGvMVCbldC9j7GeonwoEc,30238
|
| 71 |
+
fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
|
| 72 |
+
fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
|
| 73 |
+
fsspec/implementations/dbfs.py,sha256=0ndCE2OQqrWv6Y8ETufxOQ9ymIIO2JA_Q82bnilqTaw,14660
|
| 74 |
+
fsspec/implementations/dirfs.py,sha256=8EEgKin5JgFBqzHaKig7ipiFAZJvbChUX_vpC_jagoY,11136
|
| 75 |
+
fsspec/implementations/ftp.py,sha256=FzcHeieyda-ai_D8w4YKCzvI4gshuFYlBACBuEIx2Nk,11419
|
| 76 |
+
fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
|
| 77 |
+
fsspec/implementations/github.py,sha256=hCisC1vXzZ9kP1UnyGz2Ba8c9cS2JmSGFHtgHG_2Gqw,7190
|
| 78 |
+
fsspec/implementations/http.py,sha256=cK7HQdVgR8PVLWkB0q0xsXohOP16X-zQiT2uqB1Kq4E,29265
|
| 79 |
+
fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
|
| 80 |
+
fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
|
| 81 |
+
fsspec/implementations/local.py,sha256=GV5OltZrz9aOM8KKSx3T7QE7-U9KX3BOz3Eql3jw_xY,13371
|
| 82 |
+
fsspec/implementations/memory.py,sha256=-a-NR66T-sGj9xTInUsu8KsEiqd156bF8Ui9BuXfmEA,9698
|
| 83 |
+
fsspec/implementations/reference.py,sha256=BHhvx8LIYyBk5OVBWw-PmZsAs_OCaLvF1p8656bwVJE,42438
|
| 84 |
+
fsspec/implementations/sftp.py,sha256=TNmXVac9c5H9Gmiee2EjZNKXnXdkwwaNL2cHDkp_gG4,5632
|
| 85 |
+
fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
|
| 86 |
+
fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
|
| 87 |
+
fsspec/implementations/webhdfs.py,sha256=C5T96C_p66pUf2cQda-7HIZ9fKYwfCkupf2LN_7n7Dw,16145
|
| 88 |
+
fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
|
| 89 |
+
fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
|
| 90 |
+
fsspec/parquet.py,sha256=i4H3EU3K1Q6jp8sqjFji6a6gKnlOEZufaa7DRNE5X-4,19516
|
| 91 |
+
fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
|
| 92 |
+
fsspec/spec.py,sha256=kfZpvKoh-fftKG6cOkOi2k0PJJwRqV4ZX_NElCBdcB8,66154
|
| 93 |
+
fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
|
| 94 |
+
fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc,,
|
| 95 |
+
fsspec/tests/abstract/__pycache__/common.cpython-311.pyc,,
|
| 96 |
+
fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc,,
|
| 97 |
+
fsspec/tests/abstract/__pycache__/get.cpython-311.pyc,,
|
| 98 |
+
fsspec/tests/abstract/__pycache__/put.cpython-311.pyc,,
|
| 99 |
+
fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
|
| 100 |
+
fsspec/tests/abstract/copy.py,sha256=nyCp1Q9apHzti2_UPDh3HzVhRmV7dciD-3dq-wM7JuU,19643
|
| 101 |
+
fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
|
| 102 |
+
fsspec/tests/abstract/put.py,sha256=hEf-yuMWBOT7B6eWcck3tMyJWzdVXtxkY-O6LUt1KAE,20877
|
| 103 |
+
fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
|
| 104 |
+
fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/WHEEL
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wheel-Version: 1.0
|
| 2 |
+
Generator: bdist_wheel (0.42.0)
|
| 3 |
+
Root-Is-Purelib: true
|
| 4 |
+
Tag: py3-none-any
|
| 5 |
+
|
lib/python3.11/site-packages/fsspec-2023.12.2.dist-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
fsspec
|
lib/python3.11/site-packages/fsspec/parquet.py
ADDED
|
@@ -0,0 +1,551 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import json
|
| 3 |
+
import warnings
|
| 4 |
+
|
| 5 |
+
from .core import url_to_fs
|
| 6 |
+
from .utils import merge_offset_ranges
|
| 7 |
+
|
| 8 |
+
# Parquet-Specific Utilities for fsspec
|
| 9 |
+
#
|
| 10 |
+
# Most of the functions defined in this module are NOT
|
| 11 |
+
# intended for public consumption. The only exception
|
| 12 |
+
# to this is `open_parquet_file`, which should be used
|
| 13 |
+
# place of `fs.open()` to open parquet-formatted files
|
| 14 |
+
# on remote file systems.
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def open_parquet_file(
|
| 18 |
+
path,
|
| 19 |
+
mode="rb",
|
| 20 |
+
fs=None,
|
| 21 |
+
metadata=None,
|
| 22 |
+
columns=None,
|
| 23 |
+
row_groups=None,
|
| 24 |
+
storage_options=None,
|
| 25 |
+
strict=False,
|
| 26 |
+
engine="auto",
|
| 27 |
+
max_gap=64_000,
|
| 28 |
+
max_block=256_000_000,
|
| 29 |
+
footer_sample_size=1_000_000,
|
| 30 |
+
**kwargs,
|
| 31 |
+
):
|
| 32 |
+
"""
|
| 33 |
+
Return a file-like object for a single Parquet file.
|
| 34 |
+
|
| 35 |
+
The specified parquet `engine` will be used to parse the
|
| 36 |
+
footer metadata, and determine the required byte ranges
|
| 37 |
+
from the file. The target path will then be opened with
|
| 38 |
+
the "parts" (`KnownPartsOfAFile`) caching strategy.
|
| 39 |
+
|
| 40 |
+
Note that this method is intended for usage with remote
|
| 41 |
+
file systems, and is unlikely to improve parquet-read
|
| 42 |
+
performance on local file systems.
|
| 43 |
+
|
| 44 |
+
Parameters
|
| 45 |
+
----------
|
| 46 |
+
path: str
|
| 47 |
+
Target file path.
|
| 48 |
+
mode: str, optional
|
| 49 |
+
Mode option to be passed through to `fs.open`. Default is "rb".
|
| 50 |
+
metadata: Any, optional
|
| 51 |
+
Parquet metadata object. Object type must be supported
|
| 52 |
+
by the backend parquet engine. For now, only the "fastparquet"
|
| 53 |
+
engine supports an explicit `ParquetFile` metadata object.
|
| 54 |
+
If a metadata object is supplied, the remote footer metadata
|
| 55 |
+
will not need to be transferred into local memory.
|
| 56 |
+
fs: AbstractFileSystem, optional
|
| 57 |
+
Filesystem object to use for opening the file. If nothing is
|
| 58 |
+
specified, an `AbstractFileSystem` object will be inferred.
|
| 59 |
+
engine : str, default "auto"
|
| 60 |
+
Parquet engine to use for metadata parsing. Allowed options
|
| 61 |
+
include "fastparquet", "pyarrow", and "auto". The specified
|
| 62 |
+
engine must be installed in the current environment. If
|
| 63 |
+
"auto" is specified, and both engines are installed,
|
| 64 |
+
"fastparquet" will take precedence over "pyarrow".
|
| 65 |
+
columns: list, optional
|
| 66 |
+
List of all column names that may be read from the file.
|
| 67 |
+
row_groups : list, optional
|
| 68 |
+
List of all row-groups that may be read from the file. This
|
| 69 |
+
may be a list of row-group indices (integers), or it may be
|
| 70 |
+
a list of `RowGroup` metadata objects (if the "fastparquet"
|
| 71 |
+
engine is used).
|
| 72 |
+
storage_options : dict, optional
|
| 73 |
+
Used to generate an `AbstractFileSystem` object if `fs` was
|
| 74 |
+
not specified.
|
| 75 |
+
strict : bool, optional
|
| 76 |
+
Whether the resulting `KnownPartsOfAFile` cache should
|
| 77 |
+
fetch reads that go beyond a known byte-range boundary.
|
| 78 |
+
If `False` (the default), any read that ends outside a
|
| 79 |
+
known part will be zero padded. Note that using
|
| 80 |
+
`strict=True` may be useful for debugging.
|
| 81 |
+
max_gap : int, optional
|
| 82 |
+
Neighboring byte ranges will only be merged when their
|
| 83 |
+
inter-range gap is <= `max_gap`. Default is 64KB.
|
| 84 |
+
max_block : int, optional
|
| 85 |
+
Neighboring byte ranges will only be merged when the size of
|
| 86 |
+
the aggregated range is <= `max_block`. Default is 256MB.
|
| 87 |
+
footer_sample_size : int, optional
|
| 88 |
+
Number of bytes to read from the end of the path to look
|
| 89 |
+
for the footer metadata. If the sampled bytes do not contain
|
| 90 |
+
the footer, a second read request will be required, and
|
| 91 |
+
performance will suffer. Default is 1MB.
|
| 92 |
+
**kwargs :
|
| 93 |
+
Optional key-word arguments to pass to `fs.open`
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
# Make sure we have an `AbstractFileSystem` object
|
| 97 |
+
# to work with
|
| 98 |
+
if fs is None:
|
| 99 |
+
fs = url_to_fs(path, **(storage_options or {}))[0]
|
| 100 |
+
|
| 101 |
+
# For now, `columns == []` not supported. Just use
|
| 102 |
+
# default `open` command with `path` input
|
| 103 |
+
if columns is not None and len(columns) == 0:
|
| 104 |
+
return fs.open(path, mode=mode)
|
| 105 |
+
|
| 106 |
+
# Set the engine
|
| 107 |
+
engine = _set_engine(engine)
|
| 108 |
+
|
| 109 |
+
# Fetch the known byte ranges needed to read
|
| 110 |
+
# `columns` and/or `row_groups`
|
| 111 |
+
data = _get_parquet_byte_ranges(
|
| 112 |
+
[path],
|
| 113 |
+
fs,
|
| 114 |
+
metadata=metadata,
|
| 115 |
+
columns=columns,
|
| 116 |
+
row_groups=row_groups,
|
| 117 |
+
engine=engine,
|
| 118 |
+
max_gap=max_gap,
|
| 119 |
+
max_block=max_block,
|
| 120 |
+
footer_sample_size=footer_sample_size,
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Extract file name from `data`
|
| 124 |
+
fn = next(iter(data)) if data else path
|
| 125 |
+
|
| 126 |
+
# Call self.open with "parts" caching
|
| 127 |
+
options = kwargs.pop("cache_options", {}).copy()
|
| 128 |
+
return fs.open(
|
| 129 |
+
fn,
|
| 130 |
+
mode=mode,
|
| 131 |
+
cache_type="parts",
|
| 132 |
+
cache_options={
|
| 133 |
+
**options,
|
| 134 |
+
**{
|
| 135 |
+
"data": data.get(fn, {}),
|
| 136 |
+
"strict": strict,
|
| 137 |
+
},
|
| 138 |
+
},
|
| 139 |
+
**kwargs,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def _get_parquet_byte_ranges(
|
| 144 |
+
paths,
|
| 145 |
+
fs,
|
| 146 |
+
metadata=None,
|
| 147 |
+
columns=None,
|
| 148 |
+
row_groups=None,
|
| 149 |
+
max_gap=64_000,
|
| 150 |
+
max_block=256_000_000,
|
| 151 |
+
footer_sample_size=1_000_000,
|
| 152 |
+
engine="auto",
|
| 153 |
+
):
|
| 154 |
+
"""Get a dictionary of the known byte ranges needed
|
| 155 |
+
to read a specific column/row-group selection from a
|
| 156 |
+
Parquet dataset. Each value in the output dictionary
|
| 157 |
+
is intended for use as the `data` argument for the
|
| 158 |
+
`KnownPartsOfAFile` caching strategy of a single path.
|
| 159 |
+
"""
|
| 160 |
+
|
| 161 |
+
# Set engine if necessary
|
| 162 |
+
if isinstance(engine, str):
|
| 163 |
+
engine = _set_engine(engine)
|
| 164 |
+
|
| 165 |
+
# Pass to specialized function if metadata is defined
|
| 166 |
+
if metadata is not None:
|
| 167 |
+
|
| 168 |
+
# Use the provided parquet metadata object
|
| 169 |
+
# to avoid transferring/parsing footer metadata
|
| 170 |
+
return _get_parquet_byte_ranges_from_metadata(
|
| 171 |
+
metadata,
|
| 172 |
+
fs,
|
| 173 |
+
engine,
|
| 174 |
+
columns=columns,
|
| 175 |
+
row_groups=row_groups,
|
| 176 |
+
max_gap=max_gap,
|
| 177 |
+
max_block=max_block,
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
# Get file sizes asynchronously
|
| 181 |
+
file_sizes = fs.sizes(paths)
|
| 182 |
+
|
| 183 |
+
# Populate global paths, starts, & ends
|
| 184 |
+
result = {}
|
| 185 |
+
data_paths = []
|
| 186 |
+
data_starts = []
|
| 187 |
+
data_ends = []
|
| 188 |
+
add_header_magic = True
|
| 189 |
+
if columns is None and row_groups is None:
|
| 190 |
+
# We are NOT selecting specific columns or row-groups.
|
| 191 |
+
#
|
| 192 |
+
# We can avoid sampling the footers, and just transfer
|
| 193 |
+
# all file data with cat_ranges
|
| 194 |
+
for i, path in enumerate(paths):
|
| 195 |
+
result[path] = {}
|
| 196 |
+
for b in range(0, file_sizes[i], max_block):
|
| 197 |
+
data_paths.append(path)
|
| 198 |
+
data_starts.append(b)
|
| 199 |
+
data_ends.append(min(b + max_block, file_sizes[i]))
|
| 200 |
+
add_header_magic = False # "Magic" should already be included
|
| 201 |
+
else:
|
| 202 |
+
# We ARE selecting specific columns or row-groups.
|
| 203 |
+
#
|
| 204 |
+
# Gather file footers.
|
| 205 |
+
# We just take the last `footer_sample_size` bytes of each
|
| 206 |
+
# file (or the entire file if it is smaller than that)
|
| 207 |
+
footer_starts = []
|
| 208 |
+
footer_ends = []
|
| 209 |
+
for i, path in enumerate(paths):
|
| 210 |
+
footer_ends.append(file_sizes[i])
|
| 211 |
+
sample_size = max(0, file_sizes[i] - footer_sample_size)
|
| 212 |
+
footer_starts.append(sample_size)
|
| 213 |
+
footer_samples = fs.cat_ranges(paths, footer_starts, footer_ends)
|
| 214 |
+
|
| 215 |
+
# Check our footer samples and re-sample if necessary.
|
| 216 |
+
missing_footer_starts = footer_starts.copy()
|
| 217 |
+
large_footer = 0
|
| 218 |
+
for i, path in enumerate(paths):
|
| 219 |
+
footer_size = int.from_bytes(footer_samples[i][-8:-4], "little")
|
| 220 |
+
real_footer_start = file_sizes[i] - (footer_size + 8)
|
| 221 |
+
if real_footer_start < footer_starts[i]:
|
| 222 |
+
missing_footer_starts[i] = real_footer_start
|
| 223 |
+
large_footer = max(large_footer, (footer_size + 8))
|
| 224 |
+
if large_footer:
|
| 225 |
+
warnings.warn(
|
| 226 |
+
f"Not enough data was used to sample the parquet footer. "
|
| 227 |
+
f"Try setting footer_sample_size >= {large_footer}."
|
| 228 |
+
)
|
| 229 |
+
for i, block in enumerate(
|
| 230 |
+
fs.cat_ranges(
|
| 231 |
+
paths,
|
| 232 |
+
missing_footer_starts,
|
| 233 |
+
footer_starts,
|
| 234 |
+
)
|
| 235 |
+
):
|
| 236 |
+
footer_samples[i] = block + footer_samples[i]
|
| 237 |
+
footer_starts[i] = missing_footer_starts[i]
|
| 238 |
+
|
| 239 |
+
# Calculate required byte ranges for each path
|
| 240 |
+
for i, path in enumerate(paths):
|
| 241 |
+
|
| 242 |
+
# Deal with small-file case.
|
| 243 |
+
# Just include all remaining bytes of the file
|
| 244 |
+
# in a single range.
|
| 245 |
+
if file_sizes[i] < max_block:
|
| 246 |
+
if footer_starts[i] > 0:
|
| 247 |
+
# Only need to transfer the data if the
|
| 248 |
+
# footer sample isn't already the whole file
|
| 249 |
+
data_paths.append(path)
|
| 250 |
+
data_starts.append(0)
|
| 251 |
+
data_ends.append(footer_starts[i])
|
| 252 |
+
continue
|
| 253 |
+
|
| 254 |
+
# Use "engine" to collect data byte ranges
|
| 255 |
+
path_data_starts, path_data_ends = engine._parquet_byte_ranges(
|
| 256 |
+
columns,
|
| 257 |
+
row_groups=row_groups,
|
| 258 |
+
footer=footer_samples[i],
|
| 259 |
+
footer_start=footer_starts[i],
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
data_paths += [path] * len(path_data_starts)
|
| 263 |
+
data_starts += path_data_starts
|
| 264 |
+
data_ends += path_data_ends
|
| 265 |
+
|
| 266 |
+
# Merge adjacent offset ranges
|
| 267 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
| 268 |
+
data_paths,
|
| 269 |
+
data_starts,
|
| 270 |
+
data_ends,
|
| 271 |
+
max_gap=max_gap,
|
| 272 |
+
max_block=max_block,
|
| 273 |
+
sort=False, # Should already be sorted
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
# Start by populating `result` with footer samples
|
| 277 |
+
for i, path in enumerate(paths):
|
| 278 |
+
result[path] = {(footer_starts[i], footer_ends[i]): footer_samples[i]}
|
| 279 |
+
|
| 280 |
+
# Transfer the data byte-ranges into local memory
|
| 281 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
| 282 |
+
|
| 283 |
+
# Add b"PAR1" to header if necessary
|
| 284 |
+
if add_header_magic:
|
| 285 |
+
_add_header_magic(result)
|
| 286 |
+
|
| 287 |
+
return result
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def _get_parquet_byte_ranges_from_metadata(
|
| 291 |
+
metadata,
|
| 292 |
+
fs,
|
| 293 |
+
engine,
|
| 294 |
+
columns=None,
|
| 295 |
+
row_groups=None,
|
| 296 |
+
max_gap=64_000,
|
| 297 |
+
max_block=256_000_000,
|
| 298 |
+
):
|
| 299 |
+
"""Simplified version of `_get_parquet_byte_ranges` for
|
| 300 |
+
the case that an engine-specific `metadata` object is
|
| 301 |
+
provided, and the remote footer metadata does not need to
|
| 302 |
+
be transferred before calculating the required byte ranges.
|
| 303 |
+
"""
|
| 304 |
+
|
| 305 |
+
# Use "engine" to collect data byte ranges
|
| 306 |
+
data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
|
| 307 |
+
columns,
|
| 308 |
+
row_groups=row_groups,
|
| 309 |
+
metadata=metadata,
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
# Merge adjacent offset ranges
|
| 313 |
+
data_paths, data_starts, data_ends = merge_offset_ranges(
|
| 314 |
+
data_paths,
|
| 315 |
+
data_starts,
|
| 316 |
+
data_ends,
|
| 317 |
+
max_gap=max_gap,
|
| 318 |
+
max_block=max_block,
|
| 319 |
+
sort=False, # Should be sorted
|
| 320 |
+
)
|
| 321 |
+
|
| 322 |
+
# Transfer the data byte-ranges into local memory
|
| 323 |
+
result = {fn: {} for fn in list(set(data_paths))}
|
| 324 |
+
_transfer_ranges(fs, result, data_paths, data_starts, data_ends)
|
| 325 |
+
|
| 326 |
+
# Add b"PAR1" to header
|
| 327 |
+
_add_header_magic(result)
|
| 328 |
+
|
| 329 |
+
return result
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
def _transfer_ranges(fs, blocks, paths, starts, ends):
|
| 333 |
+
# Use cat_ranges to gather the data byte_ranges
|
| 334 |
+
ranges = (paths, starts, ends)
|
| 335 |
+
for path, start, stop, data in zip(*ranges, fs.cat_ranges(*ranges)):
|
| 336 |
+
blocks[path][(start, stop)] = data
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def _add_header_magic(data):
|
| 340 |
+
# Add b"PAR1" to file headers
|
| 341 |
+
for i, path in enumerate(list(data.keys())):
|
| 342 |
+
add_magic = True
|
| 343 |
+
for k in data[path].keys():
|
| 344 |
+
if k[0] == 0 and k[1] >= 4:
|
| 345 |
+
add_magic = False
|
| 346 |
+
break
|
| 347 |
+
if add_magic:
|
| 348 |
+
data[path][(0, 4)] = b"PAR1"
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
def _set_engine(engine_str):
|
| 352 |
+
|
| 353 |
+
# Define a list of parquet engines to try
|
| 354 |
+
if engine_str == "auto":
|
| 355 |
+
try_engines = ("fastparquet", "pyarrow")
|
| 356 |
+
elif not isinstance(engine_str, str):
|
| 357 |
+
raise ValueError(
|
| 358 |
+
"Failed to set parquet engine! "
|
| 359 |
+
"Please pass 'fastparquet', 'pyarrow', or 'auto'"
|
| 360 |
+
)
|
| 361 |
+
elif engine_str not in ("fastparquet", "pyarrow"):
|
| 362 |
+
raise ValueError(f"{engine_str} engine not supported by `fsspec.parquet`")
|
| 363 |
+
else:
|
| 364 |
+
try_engines = [engine_str]
|
| 365 |
+
|
| 366 |
+
# Try importing the engines in `try_engines`,
|
| 367 |
+
# and choose the first one that succeeds
|
| 368 |
+
for engine in try_engines:
|
| 369 |
+
try:
|
| 370 |
+
if engine == "fastparquet":
|
| 371 |
+
return FastparquetEngine()
|
| 372 |
+
elif engine == "pyarrow":
|
| 373 |
+
return PyarrowEngine()
|
| 374 |
+
except ImportError:
|
| 375 |
+
pass
|
| 376 |
+
|
| 377 |
+
# Raise an error if a supported parquet engine
|
| 378 |
+
# was not found
|
| 379 |
+
raise ImportError(
|
| 380 |
+
f"The following parquet engines are not installed "
|
| 381 |
+
f"in your python environment: {try_engines}."
|
| 382 |
+
f"Please install 'fastparquert' or 'pyarrow' to "
|
| 383 |
+
f"utilize the `fsspec.parquet` module."
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
class FastparquetEngine:
|
| 388 |
+
|
| 389 |
+
# The purpose of the FastparquetEngine class is
|
| 390 |
+
# to check if fastparquet can be imported (on initialization)
|
| 391 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
| 392 |
+
# future, this class may also be used to define other
|
| 393 |
+
# methods/logic that are specific to fastparquet.
|
| 394 |
+
|
| 395 |
+
def __init__(self):
|
| 396 |
+
import fastparquet as fp
|
| 397 |
+
|
| 398 |
+
self.fp = fp
|
| 399 |
+
|
| 400 |
+
def _row_group_filename(self, row_group, pf):
|
| 401 |
+
return pf.row_group_filename(row_group)
|
| 402 |
+
|
| 403 |
+
def _parquet_byte_ranges(
|
| 404 |
+
self,
|
| 405 |
+
columns,
|
| 406 |
+
row_groups=None,
|
| 407 |
+
metadata=None,
|
| 408 |
+
footer=None,
|
| 409 |
+
footer_start=None,
|
| 410 |
+
):
|
| 411 |
+
|
| 412 |
+
# Initialize offset ranges and define ParqetFile metadata
|
| 413 |
+
pf = metadata
|
| 414 |
+
data_paths, data_starts, data_ends = [], [], []
|
| 415 |
+
if pf is None:
|
| 416 |
+
pf = self.fp.ParquetFile(io.BytesIO(footer))
|
| 417 |
+
|
| 418 |
+
# Convert columns to a set and add any index columns
|
| 419 |
+
# specified in the pandas metadata (just in case)
|
| 420 |
+
column_set = None if columns is None else set(columns)
|
| 421 |
+
if column_set is not None and hasattr(pf, "pandas_metadata"):
|
| 422 |
+
md_index = [
|
| 423 |
+
ind
|
| 424 |
+
for ind in pf.pandas_metadata.get("index_columns", [])
|
| 425 |
+
# Ignore RangeIndex information
|
| 426 |
+
if not isinstance(ind, dict)
|
| 427 |
+
]
|
| 428 |
+
column_set |= set(md_index)
|
| 429 |
+
|
| 430 |
+
# Check if row_groups is a list of integers
|
| 431 |
+
# or a list of row-group metadata
|
| 432 |
+
if row_groups and not isinstance(row_groups[0], int):
|
| 433 |
+
# Input row_groups contains row-group metadata
|
| 434 |
+
row_group_indices = None
|
| 435 |
+
else:
|
| 436 |
+
# Input row_groups contains row-group indices
|
| 437 |
+
row_group_indices = row_groups
|
| 438 |
+
row_groups = pf.row_groups
|
| 439 |
+
|
| 440 |
+
# Loop through column chunks to add required byte ranges
|
| 441 |
+
for r, row_group in enumerate(row_groups):
|
| 442 |
+
# Skip this row-group if we are targeting
|
| 443 |
+
# specific row-groups
|
| 444 |
+
if row_group_indices is None or r in row_group_indices:
|
| 445 |
+
|
| 446 |
+
# Find the target parquet-file path for `row_group`
|
| 447 |
+
fn = self._row_group_filename(row_group, pf)
|
| 448 |
+
|
| 449 |
+
for column in row_group.columns:
|
| 450 |
+
name = column.meta_data.path_in_schema[0]
|
| 451 |
+
# Skip this column if we are targeting a
|
| 452 |
+
# specific columns
|
| 453 |
+
if column_set is None or name in column_set:
|
| 454 |
+
file_offset0 = column.meta_data.dictionary_page_offset
|
| 455 |
+
if file_offset0 is None:
|
| 456 |
+
file_offset0 = column.meta_data.data_page_offset
|
| 457 |
+
num_bytes = column.meta_data.total_compressed_size
|
| 458 |
+
if footer_start is None or file_offset0 < footer_start:
|
| 459 |
+
data_paths.append(fn)
|
| 460 |
+
data_starts.append(file_offset0)
|
| 461 |
+
data_ends.append(
|
| 462 |
+
min(
|
| 463 |
+
file_offset0 + num_bytes,
|
| 464 |
+
footer_start or (file_offset0 + num_bytes),
|
| 465 |
+
)
|
| 466 |
+
)
|
| 467 |
+
|
| 468 |
+
if metadata:
|
| 469 |
+
# The metadata in this call may map to multiple
|
| 470 |
+
# file paths. Need to include `data_paths`
|
| 471 |
+
return data_paths, data_starts, data_ends
|
| 472 |
+
return data_starts, data_ends
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
class PyarrowEngine:
|
| 476 |
+
|
| 477 |
+
# The purpose of the PyarrowEngine class is
|
| 478 |
+
# to check if pyarrow can be imported (on initialization)
|
| 479 |
+
# and to define a `_parquet_byte_ranges` method. In the
|
| 480 |
+
# future, this class may also be used to define other
|
| 481 |
+
# methods/logic that are specific to pyarrow.
|
| 482 |
+
|
| 483 |
+
def __init__(self):
|
| 484 |
+
import pyarrow.parquet as pq
|
| 485 |
+
|
| 486 |
+
self.pq = pq
|
| 487 |
+
|
| 488 |
+
def _row_group_filename(self, row_group, metadata):
|
| 489 |
+
raise NotImplementedError
|
| 490 |
+
|
| 491 |
+
def _parquet_byte_ranges(
|
| 492 |
+
self,
|
| 493 |
+
columns,
|
| 494 |
+
row_groups=None,
|
| 495 |
+
metadata=None,
|
| 496 |
+
footer=None,
|
| 497 |
+
footer_start=None,
|
| 498 |
+
):
|
| 499 |
+
|
| 500 |
+
if metadata is not None:
|
| 501 |
+
raise ValueError("metadata input not supported for PyarrowEngine")
|
| 502 |
+
|
| 503 |
+
data_starts, data_ends = [], []
|
| 504 |
+
md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
|
| 505 |
+
|
| 506 |
+
# Convert columns to a set and add any index columns
|
| 507 |
+
# specified in the pandas metadata (just in case)
|
| 508 |
+
column_set = None if columns is None else set(columns)
|
| 509 |
+
if column_set is not None:
|
| 510 |
+
schema = md.schema.to_arrow_schema()
|
| 511 |
+
has_pandas_metadata = (
|
| 512 |
+
schema.metadata is not None and b"pandas" in schema.metadata
|
| 513 |
+
)
|
| 514 |
+
if has_pandas_metadata:
|
| 515 |
+
md_index = [
|
| 516 |
+
ind
|
| 517 |
+
for ind in json.loads(
|
| 518 |
+
schema.metadata[b"pandas"].decode("utf8")
|
| 519 |
+
).get("index_columns", [])
|
| 520 |
+
# Ignore RangeIndex information
|
| 521 |
+
if not isinstance(ind, dict)
|
| 522 |
+
]
|
| 523 |
+
column_set |= set(md_index)
|
| 524 |
+
|
| 525 |
+
# Loop through column chunks to add required byte ranges
|
| 526 |
+
for r in range(md.num_row_groups):
|
| 527 |
+
# Skip this row-group if we are targeting
|
| 528 |
+
# specific row-groups
|
| 529 |
+
if row_groups is None or r in row_groups:
|
| 530 |
+
row_group = md.row_group(r)
|
| 531 |
+
for c in range(row_group.num_columns):
|
| 532 |
+
column = row_group.column(c)
|
| 533 |
+
name = column.path_in_schema
|
| 534 |
+
# Skip this column if we are targeting a
|
| 535 |
+
# specific columns
|
| 536 |
+
split_name = name.split(".")[0]
|
| 537 |
+
if (
|
| 538 |
+
column_set is None
|
| 539 |
+
or name in column_set
|
| 540 |
+
or split_name in column_set
|
| 541 |
+
):
|
| 542 |
+
file_offset0 = column.dictionary_page_offset
|
| 543 |
+
if file_offset0 is None:
|
| 544 |
+
file_offset0 = column.data_page_offset
|
| 545 |
+
num_bytes = column.total_compressed_size
|
| 546 |
+
if file_offset0 < footer_start:
|
| 547 |
+
data_starts.append(file_offset0)
|
| 548 |
+
data_ends.append(
|
| 549 |
+
min(file_offset0 + num_bytes, footer_start)
|
| 550 |
+
)
|
| 551 |
+
return data_starts, data_ends
|
lib/python3.11/site-packages/fsspec/registry.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import importlib
|
| 4 |
+
import types
|
| 5 |
+
import warnings
|
| 6 |
+
|
| 7 |
+
__all__ = ["registry", "get_filesystem_class", "default"]
|
| 8 |
+
|
| 9 |
+
# internal, mutable
|
| 10 |
+
_registry: dict[str, type] = {}
|
| 11 |
+
|
| 12 |
+
# external, immutable
|
| 13 |
+
registry = types.MappingProxyType(_registry)
|
| 14 |
+
default = "file"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def register_implementation(name, cls, clobber=False, errtxt=None):
|
| 18 |
+
"""Add implementation class to the registry
|
| 19 |
+
|
| 20 |
+
Parameters
|
| 21 |
+
----------
|
| 22 |
+
name: str
|
| 23 |
+
Protocol name to associate with the class
|
| 24 |
+
cls: class or str
|
| 25 |
+
if a class: fsspec-compliant implementation class (normally inherits from
|
| 26 |
+
``fsspec.AbstractFileSystem``, gets added straight to the registry. If a
|
| 27 |
+
str, the full path to an implementation class like package.module.class,
|
| 28 |
+
which gets added to known_implementations,
|
| 29 |
+
so the import is deferred until the filesystem is actually used.
|
| 30 |
+
clobber: bool (optional)
|
| 31 |
+
Whether to overwrite a protocol with the same name; if False, will raise
|
| 32 |
+
instead.
|
| 33 |
+
errtxt: str (optional)
|
| 34 |
+
If given, then a failure to import the given class will result in this
|
| 35 |
+
text being given.
|
| 36 |
+
"""
|
| 37 |
+
if isinstance(cls, str):
|
| 38 |
+
if name in known_implementations and clobber is False:
|
| 39 |
+
if cls != known_implementations[name]["class"]:
|
| 40 |
+
raise ValueError(
|
| 41 |
+
f"Name ({name}) already in the known_implementations and clobber "
|
| 42 |
+
f"is False"
|
| 43 |
+
)
|
| 44 |
+
else:
|
| 45 |
+
known_implementations[name] = {
|
| 46 |
+
"class": cls,
|
| 47 |
+
"err": errtxt or f"{cls} import failed for protocol {name}",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
else:
|
| 51 |
+
if name in registry and clobber is False:
|
| 52 |
+
if _registry[name] is not cls:
|
| 53 |
+
raise ValueError(
|
| 54 |
+
f"Name ({name}) already in the registry and clobber is False"
|
| 55 |
+
)
|
| 56 |
+
else:
|
| 57 |
+
_registry[name] = cls
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# protocols mapped to the class which implements them. This dict can be
|
| 61 |
+
# updated with register_implementation
|
| 62 |
+
known_implementations = {
|
| 63 |
+
"data": {"class": "fsspec.implementations.data.DataFileSystem"},
|
| 64 |
+
"file": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
| 65 |
+
"local": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
| 66 |
+
"memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
|
| 67 |
+
"dropbox": {
|
| 68 |
+
"class": "dropboxdrivefs.DropboxDriveFileSystem",
|
| 69 |
+
"err": (
|
| 70 |
+
'DropboxFileSystem requires "dropboxdrivefs",'
|
| 71 |
+
'"requests" and "dropbox" to be installed'
|
| 72 |
+
),
|
| 73 |
+
},
|
| 74 |
+
"http": {
|
| 75 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
| 76 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
| 77 |
+
},
|
| 78 |
+
"https": {
|
| 79 |
+
"class": "fsspec.implementations.http.HTTPFileSystem",
|
| 80 |
+
"err": 'HTTPFileSystem requires "requests" and "aiohttp" to be installed',
|
| 81 |
+
},
|
| 82 |
+
"zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
|
| 83 |
+
"tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
|
| 84 |
+
"gcs": {
|
| 85 |
+
"class": "gcsfs.GCSFileSystem",
|
| 86 |
+
"err": "Please install gcsfs to access Google Storage",
|
| 87 |
+
},
|
| 88 |
+
"gs": {
|
| 89 |
+
"class": "gcsfs.GCSFileSystem",
|
| 90 |
+
"err": "Please install gcsfs to access Google Storage",
|
| 91 |
+
},
|
| 92 |
+
"gdrive": {
|
| 93 |
+
"class": "gdrivefs.GoogleDriveFileSystem",
|
| 94 |
+
"err": "Please install gdrivefs for access to Google Drive",
|
| 95 |
+
},
|
| 96 |
+
"sftp": {
|
| 97 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
| 98 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
| 99 |
+
},
|
| 100 |
+
"ssh": {
|
| 101 |
+
"class": "fsspec.implementations.sftp.SFTPFileSystem",
|
| 102 |
+
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
| 103 |
+
},
|
| 104 |
+
"ftp": {"class": "fsspec.implementations.ftp.FTPFileSystem"},
|
| 105 |
+
"hdfs": {
|
| 106 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
| 107 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
| 108 |
+
},
|
| 109 |
+
"arrow_hdfs": {
|
| 110 |
+
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
| 111 |
+
"err": "pyarrow and local java libraries required for HDFS",
|
| 112 |
+
},
|
| 113 |
+
"webhdfs": {
|
| 114 |
+
"class": "fsspec.implementations.webhdfs.WebHDFS",
|
| 115 |
+
"err": 'webHDFS access requires "requests" to be installed',
|
| 116 |
+
},
|
| 117 |
+
"s3": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
| 118 |
+
"s3a": {"class": "s3fs.S3FileSystem", "err": "Install s3fs to access S3"},
|
| 119 |
+
"wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
|
| 120 |
+
"oci": {
|
| 121 |
+
"class": "ocifs.OCIFileSystem",
|
| 122 |
+
"err": "Install ocifs to access OCI Object Storage",
|
| 123 |
+
},
|
| 124 |
+
"ocilake": {
|
| 125 |
+
"class": "ocifs.OCIFileSystem",
|
| 126 |
+
"err": "Install ocifs to access OCI Data Lake",
|
| 127 |
+
},
|
| 128 |
+
"asynclocal": {
|
| 129 |
+
"class": "morefs.asyn_local.AsyncLocalFileSystem",
|
| 130 |
+
"err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
|
| 131 |
+
},
|
| 132 |
+
"adl": {
|
| 133 |
+
"class": "adlfs.AzureDatalakeFileSystem",
|
| 134 |
+
"err": "Install adlfs to access Azure Datalake Gen1",
|
| 135 |
+
},
|
| 136 |
+
"abfs": {
|
| 137 |
+
"class": "adlfs.AzureBlobFileSystem",
|
| 138 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
| 139 |
+
},
|
| 140 |
+
"az": {
|
| 141 |
+
"class": "adlfs.AzureBlobFileSystem",
|
| 142 |
+
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
| 143 |
+
},
|
| 144 |
+
"cached": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
| 145 |
+
"blockcache": {"class": "fsspec.implementations.cached.CachingFileSystem"},
|
| 146 |
+
"filecache": {"class": "fsspec.implementations.cached.WholeFileCacheFileSystem"},
|
| 147 |
+
"simplecache": {"class": "fsspec.implementations.cached.SimpleCacheFileSystem"},
|
| 148 |
+
"dask": {
|
| 149 |
+
"class": "fsspec.implementations.dask.DaskWorkerFileSystem",
|
| 150 |
+
"err": "Install dask distributed to access worker file system",
|
| 151 |
+
},
|
| 152 |
+
"dbfs": {
|
| 153 |
+
"class": "fsspec.implementations.dbfs.DatabricksFileSystem",
|
| 154 |
+
"err": "Install the requests package to use the DatabricksFileSystem",
|
| 155 |
+
},
|
| 156 |
+
"github": {
|
| 157 |
+
"class": "fsspec.implementations.github.GithubFileSystem",
|
| 158 |
+
"err": "Install the requests package to use the github FS",
|
| 159 |
+
},
|
| 160 |
+
"git": {
|
| 161 |
+
"class": "fsspec.implementations.git.GitFileSystem",
|
| 162 |
+
"err": "Install pygit2 to browse local git repos",
|
| 163 |
+
},
|
| 164 |
+
"smb": {
|
| 165 |
+
"class": "fsspec.implementations.smb.SMBFileSystem",
|
| 166 |
+
"err": 'SMB requires "smbprotocol" or "smbprotocol[kerberos]" installed',
|
| 167 |
+
},
|
| 168 |
+
"jupyter": {
|
| 169 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
| 170 |
+
"err": "Jupyter FS requires requests to be installed",
|
| 171 |
+
},
|
| 172 |
+
"jlab": {
|
| 173 |
+
"class": "fsspec.implementations.jupyter.JupyterFileSystem",
|
| 174 |
+
"err": "Jupyter FS requires requests to be installed",
|
| 175 |
+
},
|
| 176 |
+
"libarchive": {
|
| 177 |
+
"class": "fsspec.implementations.libarchive.LibArchiveFileSystem",
|
| 178 |
+
"err": "LibArchive requires to be installed",
|
| 179 |
+
},
|
| 180 |
+
"reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
|
| 181 |
+
"generic": {"class": "fsspec.generic.GenericFileSystem"},
|
| 182 |
+
"oss": {
|
| 183 |
+
"class": "ossfs.OSSFileSystem",
|
| 184 |
+
"err": "Install ossfs to access Alibaba Object Storage System",
|
| 185 |
+
},
|
| 186 |
+
"webdav": {
|
| 187 |
+
"class": "webdav4.fsspec.WebdavFileSystem",
|
| 188 |
+
"err": "Install webdav4 to access WebDAV",
|
| 189 |
+
},
|
| 190 |
+
"dvc": {
|
| 191 |
+
"class": "dvc.api.DVCFileSystem",
|
| 192 |
+
"err": "Install dvc to access DVCFileSystem",
|
| 193 |
+
},
|
| 194 |
+
"hf": {
|
| 195 |
+
"class": "huggingface_hub.HfFileSystem",
|
| 196 |
+
"err": "Install huggingface_hub to access HfFileSystem",
|
| 197 |
+
},
|
| 198 |
+
"root": {
|
| 199 |
+
"class": "fsspec_xrootd.XRootDFileSystem",
|
| 200 |
+
"err": "Install fsspec-xrootd to access xrootd storage system."
|
| 201 |
+
+ " Note: 'root' is the protocol name for xrootd storage systems,"
|
| 202 |
+
+ " not referring to root directories",
|
| 203 |
+
},
|
| 204 |
+
"dir": {"class": "fsspec.implementations.dirfs.DirFileSystem"},
|
| 205 |
+
"box": {
|
| 206 |
+
"class": "boxfs.BoxFileSystem",
|
| 207 |
+
"err": "Please install boxfs to access BoxFileSystem",
|
| 208 |
+
},
|
| 209 |
+
"lakefs": {
|
| 210 |
+
"class": "lakefs_spec.LakeFSFileSystem",
|
| 211 |
+
"err": "Please install lakefs-spec to access LakeFSFileSystem",
|
| 212 |
+
},
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def get_filesystem_class(protocol):
|
| 217 |
+
"""Fetch named protocol implementation from the registry
|
| 218 |
+
|
| 219 |
+
The dict ``known_implementations`` maps protocol names to the locations
|
| 220 |
+
of classes implementing the corresponding file-system. When used for the
|
| 221 |
+
first time, appropriate imports will happen and the class will be placed in
|
| 222 |
+
the registry. All subsequent calls will fetch directly from the registry.
|
| 223 |
+
|
| 224 |
+
Some protocol implementations require additional dependencies, and so the
|
| 225 |
+
import may fail. In this case, the string in the "err" field of the
|
| 226 |
+
``known_implementations`` will be given as the error message.
|
| 227 |
+
"""
|
| 228 |
+
if not protocol:
|
| 229 |
+
protocol = default
|
| 230 |
+
|
| 231 |
+
if protocol not in registry:
|
| 232 |
+
if protocol not in known_implementations:
|
| 233 |
+
raise ValueError(f"Protocol not known: {protocol}")
|
| 234 |
+
bit = known_implementations[protocol]
|
| 235 |
+
try:
|
| 236 |
+
register_implementation(protocol, _import_class(bit["class"]))
|
| 237 |
+
except ImportError as e:
|
| 238 |
+
raise ImportError(bit["err"]) from e
|
| 239 |
+
cls = registry[protocol]
|
| 240 |
+
if getattr(cls, "protocol", None) in ("abstract", None):
|
| 241 |
+
cls.protocol = protocol
|
| 242 |
+
|
| 243 |
+
return cls
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
s3_msg = """Your installed version of s3fs is very old and known to cause
|
| 247 |
+
severe performance issues, see also https://github.com/dask/dask/issues/10276
|
| 248 |
+
|
| 249 |
+
To fix, you should specify a lower version bound on s3fs, or
|
| 250 |
+
update the current installation.
|
| 251 |
+
"""
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def _import_class(cls, minv=None):
|
| 255 |
+
"""Take a string FQP and return the imported class or identifier
|
| 256 |
+
|
| 257 |
+
clas is of the form "package.module.klass" or "package.module:subobject.klass"
|
| 258 |
+
"""
|
| 259 |
+
if ":" in cls:
|
| 260 |
+
mod, name = cls.rsplit(":", 1)
|
| 261 |
+
s3 = mod == "s3fs"
|
| 262 |
+
mod = importlib.import_module(mod)
|
| 263 |
+
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
| 264 |
+
warnings.warn(s3_msg)
|
| 265 |
+
for part in name.split("."):
|
| 266 |
+
mod = getattr(mod, part)
|
| 267 |
+
return mod
|
| 268 |
+
else:
|
| 269 |
+
mod, name = cls.rsplit(".", 1)
|
| 270 |
+
s3 = mod == "s3fs"
|
| 271 |
+
mod = importlib.import_module(mod)
|
| 272 |
+
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
| 273 |
+
warnings.warn(s3_msg)
|
| 274 |
+
return getattr(mod, name)
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def filesystem(protocol, **storage_options):
|
| 278 |
+
"""Instantiate filesystems for given protocol and arguments
|
| 279 |
+
|
| 280 |
+
``storage_options`` are specific to the protocol being chosen, and are
|
| 281 |
+
passed directly to the class.
|
| 282 |
+
"""
|
| 283 |
+
if protocol == "arrow_hdfs":
|
| 284 |
+
warnings.warn(
|
| 285 |
+
"The 'arrow_hdfs' protocol has been deprecated and will be "
|
| 286 |
+
"removed in the future. Specify it as 'hdfs'.",
|
| 287 |
+
DeprecationWarning,
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
cls = get_filesystem_class(protocol)
|
| 291 |
+
return cls(**storage_options)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def available_protocols():
|
| 295 |
+
"""Return a list of the implemented protocols.
|
| 296 |
+
|
| 297 |
+
Note that any given protocol may require extra packages to be importable.
|
| 298 |
+
"""
|
| 299 |
+
return list(known_implementations)
|
lib/python3.11/site-packages/fsspec/spec.py
ADDED
|
@@ -0,0 +1,1963 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import threading
|
| 7 |
+
import warnings
|
| 8 |
+
import weakref
|
| 9 |
+
from errno import ESPIPE
|
| 10 |
+
from glob import has_magic
|
| 11 |
+
from hashlib import sha256
|
| 12 |
+
from typing import ClassVar
|
| 13 |
+
|
| 14 |
+
from .callbacks import _DEFAULT_CALLBACK
|
| 15 |
+
from .config import apply_config, conf
|
| 16 |
+
from .dircache import DirCache
|
| 17 |
+
from .transaction import Transaction
|
| 18 |
+
from .utils import (
|
| 19 |
+
_unstrip_protocol,
|
| 20 |
+
glob_translate,
|
| 21 |
+
isfilelike,
|
| 22 |
+
other_paths,
|
| 23 |
+
read_block,
|
| 24 |
+
stringify_path,
|
| 25 |
+
tokenize,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger("fsspec")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def make_instance(cls, args, kwargs):
|
| 32 |
+
return cls(*args, **kwargs)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class _Cached(type):
|
| 36 |
+
"""
|
| 37 |
+
Metaclass for caching file system instances.
|
| 38 |
+
|
| 39 |
+
Notes
|
| 40 |
+
-----
|
| 41 |
+
Instances are cached according to
|
| 42 |
+
|
| 43 |
+
* The values of the class attributes listed in `_extra_tokenize_attributes`
|
| 44 |
+
* The arguments passed to ``__init__``.
|
| 45 |
+
|
| 46 |
+
This creates an additional reference to the filesystem, which prevents the
|
| 47 |
+
filesystem from being garbage collected when all *user* references go away.
|
| 48 |
+
A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
|
| 49 |
+
be made for a filesystem instance to be garbage collected.
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
def __init__(cls, *args, **kwargs):
|
| 53 |
+
super().__init__(*args, **kwargs)
|
| 54 |
+
# Note: we intentionally create a reference here, to avoid garbage
|
| 55 |
+
# collecting instances when all other references are gone. To really
|
| 56 |
+
# delete a FileSystem, the cache must be cleared.
|
| 57 |
+
if conf.get("weakref_instance_cache"): # pragma: no cover
|
| 58 |
+
# debug option for analysing fork/spawn conditions
|
| 59 |
+
cls._cache = weakref.WeakValueDictionary()
|
| 60 |
+
else:
|
| 61 |
+
cls._cache = {}
|
| 62 |
+
cls._pid = os.getpid()
|
| 63 |
+
|
| 64 |
+
def __call__(cls, *args, **kwargs):
|
| 65 |
+
kwargs = apply_config(cls, kwargs)
|
| 66 |
+
extra_tokens = tuple(
|
| 67 |
+
getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
|
| 68 |
+
)
|
| 69 |
+
token = tokenize(
|
| 70 |
+
cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
|
| 71 |
+
)
|
| 72 |
+
skip = kwargs.pop("skip_instance_cache", False)
|
| 73 |
+
if os.getpid() != cls._pid:
|
| 74 |
+
cls._cache.clear()
|
| 75 |
+
cls._pid = os.getpid()
|
| 76 |
+
if not skip and cls.cachable and token in cls._cache:
|
| 77 |
+
cls._latest = token
|
| 78 |
+
return cls._cache[token]
|
| 79 |
+
else:
|
| 80 |
+
obj = super().__call__(*args, **kwargs)
|
| 81 |
+
# Setting _fs_token here causes some static linters to complain.
|
| 82 |
+
obj._fs_token_ = token
|
| 83 |
+
obj.storage_args = args
|
| 84 |
+
obj.storage_options = kwargs
|
| 85 |
+
if obj.async_impl and obj.mirror_sync_methods:
|
| 86 |
+
from .asyn import mirror_sync_methods
|
| 87 |
+
|
| 88 |
+
mirror_sync_methods(obj)
|
| 89 |
+
|
| 90 |
+
if cls.cachable and not skip:
|
| 91 |
+
cls._latest = token
|
| 92 |
+
cls._cache[token] = obj
|
| 93 |
+
return obj
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class AbstractFileSystem(metaclass=_Cached):
|
| 97 |
+
"""
|
| 98 |
+
An abstract super-class for pythonic file-systems
|
| 99 |
+
|
| 100 |
+
Implementations are expected to be compatible with or, better, subclass
|
| 101 |
+
from here.
|
| 102 |
+
"""
|
| 103 |
+
|
| 104 |
+
cachable = True # this class can be cached, instances reused
|
| 105 |
+
_cached = False
|
| 106 |
+
blocksize = 2**22
|
| 107 |
+
sep = "/"
|
| 108 |
+
protocol: ClassVar[str | tuple[str, ...]] = "abstract"
|
| 109 |
+
_latest = None
|
| 110 |
+
async_impl = False
|
| 111 |
+
mirror_sync_methods = False
|
| 112 |
+
root_marker = "" # For some FSs, may require leading '/' or other character
|
| 113 |
+
transaction_type = Transaction
|
| 114 |
+
|
| 115 |
+
#: Extra *class attributes* that should be considered when hashing.
|
| 116 |
+
_extra_tokenize_attributes = ()
|
| 117 |
+
|
| 118 |
+
def __init__(self, *args, **storage_options):
|
| 119 |
+
"""Create and configure file-system instance
|
| 120 |
+
|
| 121 |
+
Instances may be cachable, so if similar enough arguments are seen
|
| 122 |
+
a new instance is not required. The token attribute exists to allow
|
| 123 |
+
implementations to cache instances if they wish.
|
| 124 |
+
|
| 125 |
+
A reasonable default should be provided if there are no arguments.
|
| 126 |
+
|
| 127 |
+
Subclasses should call this method.
|
| 128 |
+
|
| 129 |
+
Parameters
|
| 130 |
+
----------
|
| 131 |
+
use_listings_cache, listings_expiry_time, max_paths:
|
| 132 |
+
passed to ``DirCache``, if the implementation supports
|
| 133 |
+
directory listing caching. Pass use_listings_cache=False
|
| 134 |
+
to disable such caching.
|
| 135 |
+
skip_instance_cache: bool
|
| 136 |
+
If this is a cachable implementation, pass True here to force
|
| 137 |
+
creating a new instance even if a matching instance exists, and prevent
|
| 138 |
+
storing this instance.
|
| 139 |
+
asynchronous: bool
|
| 140 |
+
loop: asyncio-compatible IOLoop or None
|
| 141 |
+
"""
|
| 142 |
+
if self._cached:
|
| 143 |
+
# reusing instance, don't change
|
| 144 |
+
return
|
| 145 |
+
self._cached = True
|
| 146 |
+
self._intrans = False
|
| 147 |
+
self._transaction = None
|
| 148 |
+
self._invalidated_caches_in_transaction = []
|
| 149 |
+
self.dircache = DirCache(**storage_options)
|
| 150 |
+
|
| 151 |
+
if storage_options.pop("add_docs", None):
|
| 152 |
+
warnings.warn("add_docs is no longer supported.", FutureWarning)
|
| 153 |
+
|
| 154 |
+
if storage_options.pop("add_aliases", None):
|
| 155 |
+
warnings.warn("add_aliases has been removed.", FutureWarning)
|
| 156 |
+
# This is set in _Cached
|
| 157 |
+
self._fs_token_ = None
|
| 158 |
+
|
| 159 |
+
@property
|
| 160 |
+
def fsid(self):
|
| 161 |
+
"""Persistent filesystem id that can be used to compare filesystems
|
| 162 |
+
across sessions.
|
| 163 |
+
"""
|
| 164 |
+
raise NotImplementedError
|
| 165 |
+
|
| 166 |
+
@property
|
| 167 |
+
def _fs_token(self):
|
| 168 |
+
return self._fs_token_
|
| 169 |
+
|
| 170 |
+
def __dask_tokenize__(self):
|
| 171 |
+
return self._fs_token
|
| 172 |
+
|
| 173 |
+
def __hash__(self):
|
| 174 |
+
return int(self._fs_token, 16)
|
| 175 |
+
|
| 176 |
+
def __eq__(self, other):
|
| 177 |
+
return isinstance(other, type(self)) and self._fs_token == other._fs_token
|
| 178 |
+
|
| 179 |
+
def __reduce__(self):
|
| 180 |
+
return make_instance, (type(self), self.storage_args, self.storage_options)
|
| 181 |
+
|
| 182 |
+
@classmethod
|
| 183 |
+
def _strip_protocol(cls, path):
|
| 184 |
+
"""Turn path from fully-qualified to file-system-specific
|
| 185 |
+
|
| 186 |
+
May require FS-specific handling, e.g., for relative paths or links.
|
| 187 |
+
"""
|
| 188 |
+
if isinstance(path, list):
|
| 189 |
+
return [cls._strip_protocol(p) for p in path]
|
| 190 |
+
path = stringify_path(path)
|
| 191 |
+
protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
|
| 192 |
+
for protocol in protos:
|
| 193 |
+
if path.startswith(protocol + "://"):
|
| 194 |
+
path = path[len(protocol) + 3 :]
|
| 195 |
+
elif path.startswith(protocol + "::"):
|
| 196 |
+
path = path[len(protocol) + 2 :]
|
| 197 |
+
path = path.rstrip("/")
|
| 198 |
+
# use of root_marker to make minimum required path, e.g., "/"
|
| 199 |
+
return path or cls.root_marker
|
| 200 |
+
|
| 201 |
+
def unstrip_protocol(self, name: str) -> str:
|
| 202 |
+
"""Format FS-specific path to generic, including protocol"""
|
| 203 |
+
protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
|
| 204 |
+
for protocol in protos:
|
| 205 |
+
if name.startswith(f"{protocol}://"):
|
| 206 |
+
return name
|
| 207 |
+
return f"{protos[0]}://{name}"
|
| 208 |
+
|
| 209 |
+
@staticmethod
|
| 210 |
+
def _get_kwargs_from_urls(path):
|
| 211 |
+
"""If kwargs can be encoded in the paths, extract them here
|
| 212 |
+
|
| 213 |
+
This should happen before instantiation of the class; incoming paths
|
| 214 |
+
then should be amended to strip the options in methods.
|
| 215 |
+
|
| 216 |
+
Examples may look like an sftp path "sftp://user@host:/my/path", where
|
| 217 |
+
the user and host should become kwargs and later get stripped.
|
| 218 |
+
"""
|
| 219 |
+
# by default, nothing happens
|
| 220 |
+
return {}
|
| 221 |
+
|
| 222 |
+
@classmethod
|
| 223 |
+
def current(cls):
|
| 224 |
+
"""Return the most recently instantiated FileSystem
|
| 225 |
+
|
| 226 |
+
If no instance has been created, then create one with defaults
|
| 227 |
+
"""
|
| 228 |
+
if cls._latest in cls._cache:
|
| 229 |
+
return cls._cache[cls._latest]
|
| 230 |
+
return cls()
|
| 231 |
+
|
| 232 |
+
@property
|
| 233 |
+
def transaction(self):
|
| 234 |
+
"""A context within which files are committed together upon exit
|
| 235 |
+
|
| 236 |
+
Requires the file class to implement `.commit()` and `.discard()`
|
| 237 |
+
for the normal and exception cases.
|
| 238 |
+
"""
|
| 239 |
+
if self._transaction is None:
|
| 240 |
+
self._transaction = self.transaction_type(self)
|
| 241 |
+
return self._transaction
|
| 242 |
+
|
| 243 |
+
def start_transaction(self):
|
| 244 |
+
"""Begin write transaction for deferring files, non-context version"""
|
| 245 |
+
self._intrans = True
|
| 246 |
+
self._transaction = self.transaction_type(self)
|
| 247 |
+
return self.transaction
|
| 248 |
+
|
| 249 |
+
def end_transaction(self):
|
| 250 |
+
"""Finish write transaction, non-context version"""
|
| 251 |
+
self.transaction.complete()
|
| 252 |
+
self._transaction = None
|
| 253 |
+
# The invalid cache must be cleared after the transaction is completed.
|
| 254 |
+
for path in self._invalidated_caches_in_transaction:
|
| 255 |
+
self.invalidate_cache(path)
|
| 256 |
+
self._invalidated_caches_in_transaction.clear()
|
| 257 |
+
|
| 258 |
+
def invalidate_cache(self, path=None):
|
| 259 |
+
"""
|
| 260 |
+
Discard any cached directory information
|
| 261 |
+
|
| 262 |
+
Parameters
|
| 263 |
+
----------
|
| 264 |
+
path: string or None
|
| 265 |
+
If None, clear all listings cached else listings at or under given
|
| 266 |
+
path.
|
| 267 |
+
"""
|
| 268 |
+
# Not necessary to implement invalidation mechanism, may have no cache.
|
| 269 |
+
# But if have, you should call this method of parent class from your
|
| 270 |
+
# subclass to ensure expiring caches after transacations correctly.
|
| 271 |
+
# See the implementation of FTPFileSystem in ftp.py
|
| 272 |
+
if self._intrans:
|
| 273 |
+
self._invalidated_caches_in_transaction.append(path)
|
| 274 |
+
|
| 275 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
| 276 |
+
"""
|
| 277 |
+
Create directory entry at path
|
| 278 |
+
|
| 279 |
+
For systems that don't have true directories, may create an for
|
| 280 |
+
this instance only and not touch the real filesystem
|
| 281 |
+
|
| 282 |
+
Parameters
|
| 283 |
+
----------
|
| 284 |
+
path: str
|
| 285 |
+
location
|
| 286 |
+
create_parents: bool
|
| 287 |
+
if True, this is equivalent to ``makedirs``
|
| 288 |
+
kwargs:
|
| 289 |
+
may be permissions, etc.
|
| 290 |
+
"""
|
| 291 |
+
pass # not necessary to implement, may not have directories
|
| 292 |
+
|
| 293 |
+
def makedirs(self, path, exist_ok=False):
|
| 294 |
+
"""Recursively make directories
|
| 295 |
+
|
| 296 |
+
Creates directory at path and any intervening required directories.
|
| 297 |
+
Raises exception if, for instance, the path already exists but is a
|
| 298 |
+
file.
|
| 299 |
+
|
| 300 |
+
Parameters
|
| 301 |
+
----------
|
| 302 |
+
path: str
|
| 303 |
+
leaf directory name
|
| 304 |
+
exist_ok: bool (False)
|
| 305 |
+
If False, will error if the target already exists
|
| 306 |
+
"""
|
| 307 |
+
pass # not necessary to implement, may not have directories
|
| 308 |
+
|
| 309 |
+
def rmdir(self, path):
|
| 310 |
+
"""Remove a directory, if empty"""
|
| 311 |
+
pass # not necessary to implement, may not have directories
|
| 312 |
+
|
| 313 |
+
def ls(self, path, detail=True, **kwargs):
|
| 314 |
+
"""List objects at path.
|
| 315 |
+
|
| 316 |
+
This should include subdirectories and files at that location. The
|
| 317 |
+
difference between a file and a directory must be clear when details
|
| 318 |
+
are requested.
|
| 319 |
+
|
| 320 |
+
The specific keys, or perhaps a FileInfo class, or similar, is TBD,
|
| 321 |
+
but must be consistent across implementations.
|
| 322 |
+
Must include:
|
| 323 |
+
|
| 324 |
+
- full path to the entry (without protocol)
|
| 325 |
+
- size of the entry, in bytes. If the value cannot be determined, will
|
| 326 |
+
be ``None``.
|
| 327 |
+
- type of entry, "file", "directory" or other
|
| 328 |
+
|
| 329 |
+
Additional information
|
| 330 |
+
may be present, appropriate to the file-system, e.g., generation,
|
| 331 |
+
checksum, etc.
|
| 332 |
+
|
| 333 |
+
May use refresh=True|False to allow use of self._ls_from_cache to
|
| 334 |
+
check for a saved listing and avoid calling the backend. This would be
|
| 335 |
+
common where listing may be expensive.
|
| 336 |
+
|
| 337 |
+
Parameters
|
| 338 |
+
----------
|
| 339 |
+
path: str
|
| 340 |
+
detail: bool
|
| 341 |
+
if True, gives a list of dictionaries, where each is the same as
|
| 342 |
+
the result of ``info(path)``. If False, gives a list of paths
|
| 343 |
+
(str).
|
| 344 |
+
kwargs: may have additional backend-specific options, such as version
|
| 345 |
+
information
|
| 346 |
+
|
| 347 |
+
Returns
|
| 348 |
+
-------
|
| 349 |
+
List of strings if detail is False, or list of directory information
|
| 350 |
+
dicts if detail is True.
|
| 351 |
+
"""
|
| 352 |
+
raise NotImplementedError
|
| 353 |
+
|
| 354 |
+
def _ls_from_cache(self, path):
|
| 355 |
+
"""Check cache for listing
|
| 356 |
+
|
| 357 |
+
Returns listing, if found (may be empty list for a directly that exists
|
| 358 |
+
but contains nothing), None if not in cache.
|
| 359 |
+
"""
|
| 360 |
+
parent = self._parent(path)
|
| 361 |
+
if path.rstrip("/") in self.dircache:
|
| 362 |
+
return self.dircache[path.rstrip("/")]
|
| 363 |
+
try:
|
| 364 |
+
files = [
|
| 365 |
+
f
|
| 366 |
+
for f in self.dircache[parent]
|
| 367 |
+
if f["name"] == path
|
| 368 |
+
or (f["name"] == path.rstrip("/") and f["type"] == "directory")
|
| 369 |
+
]
|
| 370 |
+
if len(files) == 0:
|
| 371 |
+
# parent dir was listed but did not contain this file
|
| 372 |
+
raise FileNotFoundError(path)
|
| 373 |
+
return files
|
| 374 |
+
except KeyError:
|
| 375 |
+
pass
|
| 376 |
+
|
| 377 |
+
def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
|
| 378 |
+
"""Return all files belows path
|
| 379 |
+
|
| 380 |
+
List all files, recursing into subdirectories; output is iterator-style,
|
| 381 |
+
like ``os.walk()``. For a simple list of files, ``find()`` is available.
|
| 382 |
+
|
| 383 |
+
When topdown is True, the caller can modify the dirnames list in-place (perhaps
|
| 384 |
+
using del or slice assignment), and walk() will
|
| 385 |
+
only recurse into the subdirectories whose names remain in dirnames;
|
| 386 |
+
this can be used to prune the search, impose a specific order of visiting,
|
| 387 |
+
or even to inform walk() about directories the caller creates or renames before
|
| 388 |
+
it resumes walk() again.
|
| 389 |
+
Modifying dirnames when topdown is False has no effect. (see os.walk)
|
| 390 |
+
|
| 391 |
+
Note that the "files" outputted will include anything that is not
|
| 392 |
+
a directory, such as links.
|
| 393 |
+
|
| 394 |
+
Parameters
|
| 395 |
+
----------
|
| 396 |
+
path: str
|
| 397 |
+
Root to recurse into
|
| 398 |
+
maxdepth: int
|
| 399 |
+
Maximum recursion depth. None means limitless, but not recommended
|
| 400 |
+
on link-based file-systems.
|
| 401 |
+
topdown: bool (True)
|
| 402 |
+
Whether to walk the directory tree from the top downwards or from
|
| 403 |
+
the bottom upwards.
|
| 404 |
+
on_error: "omit", "raise", a collable
|
| 405 |
+
if omit (default), path with exception will simply be empty;
|
| 406 |
+
If raise, an underlying exception will be raised;
|
| 407 |
+
if callable, it will be called with a single OSError instance as argument
|
| 408 |
+
kwargs: passed to ``ls``
|
| 409 |
+
"""
|
| 410 |
+
if maxdepth is not None and maxdepth < 1:
|
| 411 |
+
raise ValueError("maxdepth must be at least 1")
|
| 412 |
+
|
| 413 |
+
path = self._strip_protocol(path)
|
| 414 |
+
full_dirs = {}
|
| 415 |
+
dirs = {}
|
| 416 |
+
files = {}
|
| 417 |
+
|
| 418 |
+
detail = kwargs.pop("detail", False)
|
| 419 |
+
try:
|
| 420 |
+
listing = self.ls(path, detail=True, **kwargs)
|
| 421 |
+
except (FileNotFoundError, OSError) as e:
|
| 422 |
+
if on_error == "raise":
|
| 423 |
+
raise
|
| 424 |
+
elif callable(on_error):
|
| 425 |
+
on_error(e)
|
| 426 |
+
if detail:
|
| 427 |
+
return path, {}, {}
|
| 428 |
+
return path, [], []
|
| 429 |
+
|
| 430 |
+
for info in listing:
|
| 431 |
+
# each info name must be at least [path]/part , but here
|
| 432 |
+
# we check also for names like [path]/part/
|
| 433 |
+
pathname = info["name"].rstrip("/")
|
| 434 |
+
name = pathname.rsplit("/", 1)[-1]
|
| 435 |
+
if info["type"] == "directory" and pathname != path:
|
| 436 |
+
# do not include "self" path
|
| 437 |
+
full_dirs[name] = pathname
|
| 438 |
+
dirs[name] = info
|
| 439 |
+
elif pathname == path:
|
| 440 |
+
# file-like with same name as give path
|
| 441 |
+
files[""] = info
|
| 442 |
+
else:
|
| 443 |
+
files[name] = info
|
| 444 |
+
|
| 445 |
+
if not detail:
|
| 446 |
+
dirs = list(dirs)
|
| 447 |
+
files = list(files)
|
| 448 |
+
|
| 449 |
+
if topdown:
|
| 450 |
+
# Yield before recursion if walking top down
|
| 451 |
+
yield path, dirs, files
|
| 452 |
+
|
| 453 |
+
if maxdepth is not None:
|
| 454 |
+
maxdepth -= 1
|
| 455 |
+
if maxdepth < 1:
|
| 456 |
+
if not topdown:
|
| 457 |
+
yield path, dirs, files
|
| 458 |
+
return
|
| 459 |
+
|
| 460 |
+
for d in dirs:
|
| 461 |
+
yield from self.walk(
|
| 462 |
+
full_dirs[d],
|
| 463 |
+
maxdepth=maxdepth,
|
| 464 |
+
detail=detail,
|
| 465 |
+
topdown=topdown,
|
| 466 |
+
**kwargs,
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
if not topdown:
|
| 470 |
+
# Yield after recursion if walking bottom up
|
| 471 |
+
yield path, dirs, files
|
| 472 |
+
|
| 473 |
+
def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
| 474 |
+
"""List all files below path.
|
| 475 |
+
|
| 476 |
+
Like posix ``find`` command without conditions
|
| 477 |
+
|
| 478 |
+
Parameters
|
| 479 |
+
----------
|
| 480 |
+
path : str
|
| 481 |
+
maxdepth: int or None
|
| 482 |
+
If not None, the maximum number of levels to descend
|
| 483 |
+
withdirs: bool
|
| 484 |
+
Whether to include directory paths in the output. This is True
|
| 485 |
+
when used by glob, but users usually only want files.
|
| 486 |
+
kwargs are passed to ``ls``.
|
| 487 |
+
"""
|
| 488 |
+
# TODO: allow equivalent of -name parameter
|
| 489 |
+
path = self._strip_protocol(path)
|
| 490 |
+
out = {}
|
| 491 |
+
|
| 492 |
+
# Add the root directory if withdirs is requested
|
| 493 |
+
# This is needed for posix glob compliance
|
| 494 |
+
if withdirs and path != "" and self.isdir(path):
|
| 495 |
+
out[path] = self.info(path)
|
| 496 |
+
|
| 497 |
+
for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
|
| 498 |
+
if withdirs:
|
| 499 |
+
files.update(dirs)
|
| 500 |
+
out.update({info["name"]: info for name, info in files.items()})
|
| 501 |
+
if not out and self.isfile(path):
|
| 502 |
+
# walk works on directories, but find should also return [path]
|
| 503 |
+
# when path happens to be a file
|
| 504 |
+
out[path] = {}
|
| 505 |
+
names = sorted(out)
|
| 506 |
+
if not detail:
|
| 507 |
+
return names
|
| 508 |
+
else:
|
| 509 |
+
return {name: out[name] for name in names}
|
| 510 |
+
|
| 511 |
+
def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
|
| 512 |
+
"""Space used by files and optionally directories within a path
|
| 513 |
+
|
| 514 |
+
Directory size does not include the size of its contents.
|
| 515 |
+
|
| 516 |
+
Parameters
|
| 517 |
+
----------
|
| 518 |
+
path: str
|
| 519 |
+
total: bool
|
| 520 |
+
Whether to sum all the file sizes
|
| 521 |
+
maxdepth: int or None
|
| 522 |
+
Maximum number of directory levels to descend, None for unlimited.
|
| 523 |
+
withdirs: bool
|
| 524 |
+
Whether to include directory paths in the output.
|
| 525 |
+
kwargs: passed to ``find``
|
| 526 |
+
|
| 527 |
+
Returns
|
| 528 |
+
-------
|
| 529 |
+
Dict of {path: size} if total=False, or int otherwise, where numbers
|
| 530 |
+
refer to bytes used.
|
| 531 |
+
"""
|
| 532 |
+
sizes = {}
|
| 533 |
+
if withdirs and self.isdir(path):
|
| 534 |
+
# Include top-level directory in output
|
| 535 |
+
info = self.info(path)
|
| 536 |
+
sizes[info["name"]] = info["size"]
|
| 537 |
+
for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
|
| 538 |
+
info = self.info(f)
|
| 539 |
+
sizes[info["name"]] = info["size"]
|
| 540 |
+
if total:
|
| 541 |
+
return sum(sizes.values())
|
| 542 |
+
else:
|
| 543 |
+
return sizes
|
| 544 |
+
|
| 545 |
+
def glob(self, path, maxdepth=None, **kwargs):
|
| 546 |
+
"""
|
| 547 |
+
Find files by glob-matching.
|
| 548 |
+
|
| 549 |
+
If the path ends with '/', only folders are returned.
|
| 550 |
+
|
| 551 |
+
We support ``"**"``,
|
| 552 |
+
``"?"`` and ``"[..]"``. We do not support ^ for pattern negation.
|
| 553 |
+
|
| 554 |
+
The `maxdepth` option is applied on the first `**` found in the path.
|
| 555 |
+
|
| 556 |
+
kwargs are passed to ``ls``.
|
| 557 |
+
"""
|
| 558 |
+
if maxdepth is not None and maxdepth < 1:
|
| 559 |
+
raise ValueError("maxdepth must be at least 1")
|
| 560 |
+
|
| 561 |
+
import re
|
| 562 |
+
|
| 563 |
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
| 564 |
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
| 565 |
+
path = self._strip_protocol(path)
|
| 566 |
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
| 567 |
+
tuple(sep + "**" for sep in seps)
|
| 568 |
+
)
|
| 569 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
| 570 |
+
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
| 571 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
| 572 |
+
|
| 573 |
+
min_idx = min(idx_star, idx_qmark, idx_brace)
|
| 574 |
+
|
| 575 |
+
detail = kwargs.pop("detail", False)
|
| 576 |
+
|
| 577 |
+
if not has_magic(path):
|
| 578 |
+
if self.exists(path, **kwargs):
|
| 579 |
+
if not detail:
|
| 580 |
+
return [path]
|
| 581 |
+
else:
|
| 582 |
+
return {path: self.info(path, **kwargs)}
|
| 583 |
+
else:
|
| 584 |
+
if not detail:
|
| 585 |
+
return [] # glob of non-existent returns empty
|
| 586 |
+
else:
|
| 587 |
+
return {}
|
| 588 |
+
elif "/" in path[:min_idx]:
|
| 589 |
+
min_idx = path[:min_idx].rindex("/")
|
| 590 |
+
root = path[: min_idx + 1]
|
| 591 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 592 |
+
else:
|
| 593 |
+
root = ""
|
| 594 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
| 595 |
+
|
| 596 |
+
if "**" in path:
|
| 597 |
+
if maxdepth is not None:
|
| 598 |
+
idx_double_stars = path.find("**")
|
| 599 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
| 600 |
+
depth = depth - depth_double_stars + maxdepth
|
| 601 |
+
else:
|
| 602 |
+
depth = None
|
| 603 |
+
|
| 604 |
+
allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
|
| 605 |
+
|
| 606 |
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
| 607 |
+
pattern = re.compile(pattern)
|
| 608 |
+
|
| 609 |
+
out = {
|
| 610 |
+
p: info
|
| 611 |
+
for p, info in sorted(allpaths.items())
|
| 612 |
+
if pattern.match(
|
| 613 |
+
(
|
| 614 |
+
p + "/"
|
| 615 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
| 616 |
+
else p
|
| 617 |
+
)
|
| 618 |
+
)
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
if detail:
|
| 622 |
+
return out
|
| 623 |
+
else:
|
| 624 |
+
return list(out)
|
| 625 |
+
|
| 626 |
+
def exists(self, path, **kwargs):
|
| 627 |
+
"""Is there a file at the given path"""
|
| 628 |
+
try:
|
| 629 |
+
self.info(path, **kwargs)
|
| 630 |
+
return True
|
| 631 |
+
except: # noqa: E722
|
| 632 |
+
# any exception allowed bar FileNotFoundError?
|
| 633 |
+
return False
|
| 634 |
+
|
| 635 |
+
def lexists(self, path, **kwargs):
|
| 636 |
+
"""If there is a file at the given path (including
|
| 637 |
+
broken links)"""
|
| 638 |
+
return self.exists(path)
|
| 639 |
+
|
| 640 |
+
def info(self, path, **kwargs):
|
| 641 |
+
"""Give details of entry at path
|
| 642 |
+
|
| 643 |
+
Returns a single dictionary, with exactly the same information as ``ls``
|
| 644 |
+
would with ``detail=True``.
|
| 645 |
+
|
| 646 |
+
The default implementation should calls ls and could be overridden by a
|
| 647 |
+
shortcut. kwargs are passed on to ```ls()``.
|
| 648 |
+
|
| 649 |
+
Some file systems might not be able to measure the file's size, in
|
| 650 |
+
which case, the returned dict will include ``'size': None``.
|
| 651 |
+
|
| 652 |
+
Returns
|
| 653 |
+
-------
|
| 654 |
+
dict with keys: name (full path in the FS), size (in bytes), type (file,
|
| 655 |
+
directory, or something else) and other FS-specific keys.
|
| 656 |
+
"""
|
| 657 |
+
path = self._strip_protocol(path)
|
| 658 |
+
out = self.ls(self._parent(path), detail=True, **kwargs)
|
| 659 |
+
out = [o for o in out if o["name"].rstrip("/") == path]
|
| 660 |
+
if out:
|
| 661 |
+
return out[0]
|
| 662 |
+
out = self.ls(path, detail=True, **kwargs)
|
| 663 |
+
path = path.rstrip("/")
|
| 664 |
+
out1 = [o for o in out if o["name"].rstrip("/") == path]
|
| 665 |
+
if len(out1) == 1:
|
| 666 |
+
if "size" not in out1[0]:
|
| 667 |
+
out1[0]["size"] = None
|
| 668 |
+
return out1[0]
|
| 669 |
+
elif len(out1) > 1 or out:
|
| 670 |
+
return {"name": path, "size": 0, "type": "directory"}
|
| 671 |
+
else:
|
| 672 |
+
raise FileNotFoundError(path)
|
| 673 |
+
|
| 674 |
+
def checksum(self, path):
|
| 675 |
+
"""Unique value for current version of file
|
| 676 |
+
|
| 677 |
+
If the checksum is the same from one moment to another, the contents
|
| 678 |
+
are guaranteed to be the same. If the checksum changes, the contents
|
| 679 |
+
*might* have changed.
|
| 680 |
+
|
| 681 |
+
This should normally be overridden; default will probably capture
|
| 682 |
+
creation/modification timestamp (which would be good) or maybe
|
| 683 |
+
access timestamp (which would be bad)
|
| 684 |
+
"""
|
| 685 |
+
return int(tokenize(self.info(path)), 16)
|
| 686 |
+
|
| 687 |
+
def size(self, path):
|
| 688 |
+
"""Size in bytes of file"""
|
| 689 |
+
return self.info(path).get("size", None)
|
| 690 |
+
|
| 691 |
+
def sizes(self, paths):
|
| 692 |
+
"""Size in bytes of each file in a list of paths"""
|
| 693 |
+
return [self.size(p) for p in paths]
|
| 694 |
+
|
| 695 |
+
def isdir(self, path):
|
| 696 |
+
"""Is this entry directory-like?"""
|
| 697 |
+
try:
|
| 698 |
+
return self.info(path)["type"] == "directory"
|
| 699 |
+
except OSError:
|
| 700 |
+
return False
|
| 701 |
+
|
| 702 |
+
def isfile(self, path):
|
| 703 |
+
"""Is this entry file-like?"""
|
| 704 |
+
try:
|
| 705 |
+
return self.info(path)["type"] == "file"
|
| 706 |
+
except: # noqa: E722
|
| 707 |
+
return False
|
| 708 |
+
|
| 709 |
+
def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
|
| 710 |
+
"""Get the contents of the file as a string.
|
| 711 |
+
|
| 712 |
+
Parameters
|
| 713 |
+
----------
|
| 714 |
+
path: str
|
| 715 |
+
URL of file on this filesystems
|
| 716 |
+
encoding, errors, newline: same as `open`.
|
| 717 |
+
"""
|
| 718 |
+
with self.open(
|
| 719 |
+
path,
|
| 720 |
+
mode="r",
|
| 721 |
+
encoding=encoding,
|
| 722 |
+
errors=errors,
|
| 723 |
+
newline=newline,
|
| 724 |
+
**kwargs,
|
| 725 |
+
) as f:
|
| 726 |
+
return f.read()
|
| 727 |
+
|
| 728 |
+
def write_text(
|
| 729 |
+
self, path, value, encoding=None, errors=None, newline=None, **kwargs
|
| 730 |
+
):
|
| 731 |
+
"""Write the text to the given file.
|
| 732 |
+
|
| 733 |
+
An existing file will be overwritten.
|
| 734 |
+
|
| 735 |
+
Parameters
|
| 736 |
+
----------
|
| 737 |
+
path: str
|
| 738 |
+
URL of file on this filesystems
|
| 739 |
+
value: str
|
| 740 |
+
Text to write.
|
| 741 |
+
encoding, errors, newline: same as `open`.
|
| 742 |
+
"""
|
| 743 |
+
with self.open(
|
| 744 |
+
path,
|
| 745 |
+
mode="w",
|
| 746 |
+
encoding=encoding,
|
| 747 |
+
errors=errors,
|
| 748 |
+
newline=newline,
|
| 749 |
+
**kwargs,
|
| 750 |
+
) as f:
|
| 751 |
+
return f.write(value)
|
| 752 |
+
|
| 753 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
| 754 |
+
"""Get the content of a file
|
| 755 |
+
|
| 756 |
+
Parameters
|
| 757 |
+
----------
|
| 758 |
+
path: URL of file on this filesystems
|
| 759 |
+
start, end: int
|
| 760 |
+
Bytes limits of the read. If negative, backwards from end,
|
| 761 |
+
like usual python slices. Either can be None for start or
|
| 762 |
+
end of file, respectively
|
| 763 |
+
kwargs: passed to ``open()``.
|
| 764 |
+
"""
|
| 765 |
+
# explicitly set buffering off?
|
| 766 |
+
with self.open(path, "rb", **kwargs) as f:
|
| 767 |
+
if start is not None:
|
| 768 |
+
if start >= 0:
|
| 769 |
+
f.seek(start)
|
| 770 |
+
else:
|
| 771 |
+
f.seek(max(0, f.size + start))
|
| 772 |
+
if end is not None:
|
| 773 |
+
if end < 0:
|
| 774 |
+
end = f.size + end
|
| 775 |
+
return f.read(end - f.tell())
|
| 776 |
+
return f.read()
|
| 777 |
+
|
| 778 |
+
def pipe_file(self, path, value, **kwargs):
|
| 779 |
+
"""Set the bytes of given file"""
|
| 780 |
+
with self.open(path, "wb", **kwargs) as f:
|
| 781 |
+
f.write(value)
|
| 782 |
+
|
| 783 |
+
def pipe(self, path, value=None, **kwargs):
|
| 784 |
+
"""Put value into path
|
| 785 |
+
|
| 786 |
+
(counterpart to ``cat``)
|
| 787 |
+
|
| 788 |
+
Parameters
|
| 789 |
+
----------
|
| 790 |
+
path: string or dict(str, bytes)
|
| 791 |
+
If a string, a single remote location to put ``value`` bytes; if a dict,
|
| 792 |
+
a mapping of {path: bytesvalue}.
|
| 793 |
+
value: bytes, optional
|
| 794 |
+
If using a single path, these are the bytes to put there. Ignored if
|
| 795 |
+
``path`` is a dict
|
| 796 |
+
"""
|
| 797 |
+
if isinstance(path, str):
|
| 798 |
+
self.pipe_file(self._strip_protocol(path), value, **kwargs)
|
| 799 |
+
elif isinstance(path, dict):
|
| 800 |
+
for k, v in path.items():
|
| 801 |
+
self.pipe_file(self._strip_protocol(k), v, **kwargs)
|
| 802 |
+
else:
|
| 803 |
+
raise ValueError("path must be str or dict")
|
| 804 |
+
|
| 805 |
+
def cat_ranges(
|
| 806 |
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
| 807 |
+
):
|
| 808 |
+
"""Get the contents of byte ranges from one or more files
|
| 809 |
+
|
| 810 |
+
Parameters
|
| 811 |
+
----------
|
| 812 |
+
paths: list
|
| 813 |
+
A list of of filepaths on this filesystems
|
| 814 |
+
starts, ends: int or list
|
| 815 |
+
Bytes limits of the read. If using a single int, the same value will be
|
| 816 |
+
used to read all the specified files.
|
| 817 |
+
"""
|
| 818 |
+
if max_gap is not None:
|
| 819 |
+
raise NotImplementedError
|
| 820 |
+
if not isinstance(paths, list):
|
| 821 |
+
raise TypeError
|
| 822 |
+
if not isinstance(starts, list):
|
| 823 |
+
starts = [starts] * len(paths)
|
| 824 |
+
if not isinstance(ends, list):
|
| 825 |
+
ends = [ends] * len(paths)
|
| 826 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 827 |
+
raise ValueError
|
| 828 |
+
out = []
|
| 829 |
+
for p, s, e in zip(paths, starts, ends):
|
| 830 |
+
try:
|
| 831 |
+
out.append(self.cat_file(p, s, e))
|
| 832 |
+
except Exception as e:
|
| 833 |
+
if on_error == "return":
|
| 834 |
+
out.append(e)
|
| 835 |
+
else:
|
| 836 |
+
raise
|
| 837 |
+
return out
|
| 838 |
+
|
| 839 |
+
def cat(self, path, recursive=False, on_error="raise", **kwargs):
|
| 840 |
+
"""Fetch (potentially multiple) paths' contents
|
| 841 |
+
|
| 842 |
+
Parameters
|
| 843 |
+
----------
|
| 844 |
+
recursive: bool
|
| 845 |
+
If True, assume the path(s) are directories, and get all the
|
| 846 |
+
contained files
|
| 847 |
+
on_error : "raise", "omit", "return"
|
| 848 |
+
If raise, an underlying exception will be raised (converted to KeyError
|
| 849 |
+
if the type is in self.missing_exceptions); if omit, keys with exception
|
| 850 |
+
will simply not be included in the output; if "return", all keys are
|
| 851 |
+
included in the output, but the value will be bytes or an exception
|
| 852 |
+
instance.
|
| 853 |
+
kwargs: passed to cat_file
|
| 854 |
+
|
| 855 |
+
Returns
|
| 856 |
+
-------
|
| 857 |
+
dict of {path: contents} if there are multiple paths
|
| 858 |
+
or the path has been otherwise expanded
|
| 859 |
+
"""
|
| 860 |
+
paths = self.expand_path(path, recursive=recursive)
|
| 861 |
+
if (
|
| 862 |
+
len(paths) > 1
|
| 863 |
+
or isinstance(path, list)
|
| 864 |
+
or paths[0] != self._strip_protocol(path)
|
| 865 |
+
):
|
| 866 |
+
out = {}
|
| 867 |
+
for path in paths:
|
| 868 |
+
try:
|
| 869 |
+
out[path] = self.cat_file(path, **kwargs)
|
| 870 |
+
except Exception as e:
|
| 871 |
+
if on_error == "raise":
|
| 872 |
+
raise
|
| 873 |
+
if on_error == "return":
|
| 874 |
+
out[path] = e
|
| 875 |
+
return out
|
| 876 |
+
else:
|
| 877 |
+
return self.cat_file(paths[0], **kwargs)
|
| 878 |
+
|
| 879 |
+
def get_file(
|
| 880 |
+
self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs
|
| 881 |
+
):
|
| 882 |
+
"""Copy single remote file to local"""
|
| 883 |
+
from .implementations.local import LocalFileSystem
|
| 884 |
+
|
| 885 |
+
if isfilelike(lpath):
|
| 886 |
+
outfile = lpath
|
| 887 |
+
elif self.isdir(rpath):
|
| 888 |
+
os.makedirs(lpath, exist_ok=True)
|
| 889 |
+
return None
|
| 890 |
+
|
| 891 |
+
fs = LocalFileSystem(auto_mkdir=True)
|
| 892 |
+
fs.makedirs(fs._parent(lpath), exist_ok=True)
|
| 893 |
+
|
| 894 |
+
with self.open(rpath, "rb", **kwargs) as f1:
|
| 895 |
+
if outfile is None:
|
| 896 |
+
outfile = open(lpath, "wb")
|
| 897 |
+
|
| 898 |
+
try:
|
| 899 |
+
callback.set_size(getattr(f1, "size", None))
|
| 900 |
+
data = True
|
| 901 |
+
while data:
|
| 902 |
+
data = f1.read(self.blocksize)
|
| 903 |
+
segment_len = outfile.write(data)
|
| 904 |
+
if segment_len is None:
|
| 905 |
+
segment_len = len(data)
|
| 906 |
+
callback.relative_update(segment_len)
|
| 907 |
+
finally:
|
| 908 |
+
if not isfilelike(lpath):
|
| 909 |
+
outfile.close()
|
| 910 |
+
|
| 911 |
+
def get(
|
| 912 |
+
self,
|
| 913 |
+
rpath,
|
| 914 |
+
lpath,
|
| 915 |
+
recursive=False,
|
| 916 |
+
callback=_DEFAULT_CALLBACK,
|
| 917 |
+
maxdepth=None,
|
| 918 |
+
**kwargs,
|
| 919 |
+
):
|
| 920 |
+
"""Copy file(s) to local.
|
| 921 |
+
|
| 922 |
+
Copies a specific file or tree of files (if recursive=True). If lpath
|
| 923 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 924 |
+
will go within. Can submit a list of paths, which may be glob-patterns
|
| 925 |
+
and will be expanded.
|
| 926 |
+
|
| 927 |
+
Calls get_file for each source.
|
| 928 |
+
"""
|
| 929 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 930 |
+
# No need to expand paths when both source and destination
|
| 931 |
+
# are provided as lists
|
| 932 |
+
rpaths = rpath
|
| 933 |
+
lpaths = lpath
|
| 934 |
+
else:
|
| 935 |
+
from .implementations.local import (
|
| 936 |
+
LocalFileSystem,
|
| 937 |
+
make_path_posix,
|
| 938 |
+
trailing_sep,
|
| 939 |
+
)
|
| 940 |
+
|
| 941 |
+
source_is_str = isinstance(rpath, str)
|
| 942 |
+
rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
|
| 943 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 944 |
+
# Non-recursive glob does not copy directories
|
| 945 |
+
rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
|
| 946 |
+
if not rpaths:
|
| 947 |
+
return
|
| 948 |
+
|
| 949 |
+
if isinstance(lpath, str):
|
| 950 |
+
lpath = make_path_posix(lpath)
|
| 951 |
+
|
| 952 |
+
source_is_file = len(rpaths) == 1
|
| 953 |
+
dest_is_dir = isinstance(lpath, str) and (
|
| 954 |
+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
|
| 955 |
+
)
|
| 956 |
+
|
| 957 |
+
exists = source_is_str and (
|
| 958 |
+
(has_magic(rpath) and source_is_file)
|
| 959 |
+
or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
|
| 960 |
+
)
|
| 961 |
+
lpaths = other_paths(
|
| 962 |
+
rpaths,
|
| 963 |
+
lpath,
|
| 964 |
+
exists=exists,
|
| 965 |
+
flatten=not source_is_str,
|
| 966 |
+
)
|
| 967 |
+
|
| 968 |
+
callback.set_size(len(lpaths))
|
| 969 |
+
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
| 970 |
+
callback.branch(rpath, lpath, kwargs)
|
| 971 |
+
self.get_file(rpath, lpath, **kwargs)
|
| 972 |
+
|
| 973 |
+
def put_file(self, lpath, rpath, callback=_DEFAULT_CALLBACK, **kwargs):
|
| 974 |
+
"""Copy single file to remote"""
|
| 975 |
+
if os.path.isdir(lpath):
|
| 976 |
+
self.makedirs(rpath, exist_ok=True)
|
| 977 |
+
return None
|
| 978 |
+
|
| 979 |
+
with open(lpath, "rb") as f1:
|
| 980 |
+
size = f1.seek(0, 2)
|
| 981 |
+
callback.set_size(size)
|
| 982 |
+
f1.seek(0)
|
| 983 |
+
|
| 984 |
+
self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
|
| 985 |
+
with self.open(rpath, "wb", **kwargs) as f2:
|
| 986 |
+
while f1.tell() < size:
|
| 987 |
+
data = f1.read(self.blocksize)
|
| 988 |
+
segment_len = f2.write(data)
|
| 989 |
+
if segment_len is None:
|
| 990 |
+
segment_len = len(data)
|
| 991 |
+
callback.relative_update(segment_len)
|
| 992 |
+
|
| 993 |
+
def put(
|
| 994 |
+
self,
|
| 995 |
+
lpath,
|
| 996 |
+
rpath,
|
| 997 |
+
recursive=False,
|
| 998 |
+
callback=_DEFAULT_CALLBACK,
|
| 999 |
+
maxdepth=None,
|
| 1000 |
+
**kwargs,
|
| 1001 |
+
):
|
| 1002 |
+
"""Copy file(s) from local.
|
| 1003 |
+
|
| 1004 |
+
Copies a specific file or tree of files (if recursive=True). If rpath
|
| 1005 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
| 1006 |
+
will go within.
|
| 1007 |
+
|
| 1008 |
+
Calls put_file for each source.
|
| 1009 |
+
"""
|
| 1010 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
| 1011 |
+
# No need to expand paths when both source and destination
|
| 1012 |
+
# are provided as lists
|
| 1013 |
+
rpaths = rpath
|
| 1014 |
+
lpaths = lpath
|
| 1015 |
+
else:
|
| 1016 |
+
from .implementations.local import (
|
| 1017 |
+
LocalFileSystem,
|
| 1018 |
+
make_path_posix,
|
| 1019 |
+
trailing_sep,
|
| 1020 |
+
)
|
| 1021 |
+
|
| 1022 |
+
source_is_str = isinstance(lpath, str)
|
| 1023 |
+
if source_is_str:
|
| 1024 |
+
lpath = make_path_posix(lpath)
|
| 1025 |
+
fs = LocalFileSystem()
|
| 1026 |
+
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
|
| 1027 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 1028 |
+
# Non-recursive glob does not copy directories
|
| 1029 |
+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
|
| 1030 |
+
if not lpaths:
|
| 1031 |
+
return
|
| 1032 |
+
|
| 1033 |
+
source_is_file = len(lpaths) == 1
|
| 1034 |
+
dest_is_dir = isinstance(rpath, str) and (
|
| 1035 |
+
trailing_sep(rpath) or self.isdir(rpath)
|
| 1036 |
+
)
|
| 1037 |
+
|
| 1038 |
+
rpath = (
|
| 1039 |
+
self._strip_protocol(rpath)
|
| 1040 |
+
if isinstance(rpath, str)
|
| 1041 |
+
else [self._strip_protocol(p) for p in rpath]
|
| 1042 |
+
)
|
| 1043 |
+
exists = source_is_str and (
|
| 1044 |
+
(has_magic(lpath) and source_is_file)
|
| 1045 |
+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
|
| 1046 |
+
)
|
| 1047 |
+
rpaths = other_paths(
|
| 1048 |
+
lpaths,
|
| 1049 |
+
rpath,
|
| 1050 |
+
exists=exists,
|
| 1051 |
+
flatten=not source_is_str,
|
| 1052 |
+
)
|
| 1053 |
+
|
| 1054 |
+
callback.set_size(len(rpaths))
|
| 1055 |
+
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
| 1056 |
+
callback.branch(lpath, rpath, kwargs)
|
| 1057 |
+
self.put_file(lpath, rpath, **kwargs)
|
| 1058 |
+
|
| 1059 |
+
def head(self, path, size=1024):
|
| 1060 |
+
"""Get the first ``size`` bytes from file"""
|
| 1061 |
+
with self.open(path, "rb") as f:
|
| 1062 |
+
return f.read(size)
|
| 1063 |
+
|
| 1064 |
+
def tail(self, path, size=1024):
|
| 1065 |
+
"""Get the last ``size`` bytes from file"""
|
| 1066 |
+
with self.open(path, "rb") as f:
|
| 1067 |
+
f.seek(max(-size, -f.size), 2)
|
| 1068 |
+
return f.read()
|
| 1069 |
+
|
| 1070 |
+
def cp_file(self, path1, path2, **kwargs):
|
| 1071 |
+
raise NotImplementedError
|
| 1072 |
+
|
| 1073 |
+
def copy(
|
| 1074 |
+
self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
|
| 1075 |
+
):
|
| 1076 |
+
"""Copy within two locations in the filesystem
|
| 1077 |
+
|
| 1078 |
+
on_error : "raise", "ignore"
|
| 1079 |
+
If raise, any not-found exceptions will be raised; if ignore any
|
| 1080 |
+
not-found exceptions will cause the path to be skipped; defaults to
|
| 1081 |
+
raise unless recursive is true, where the default is ignore
|
| 1082 |
+
"""
|
| 1083 |
+
if on_error is None and recursive:
|
| 1084 |
+
on_error = "ignore"
|
| 1085 |
+
elif on_error is None:
|
| 1086 |
+
on_error = "raise"
|
| 1087 |
+
|
| 1088 |
+
if isinstance(path1, list) and isinstance(path2, list):
|
| 1089 |
+
# No need to expand paths when both source and destination
|
| 1090 |
+
# are provided as lists
|
| 1091 |
+
paths1 = path1
|
| 1092 |
+
paths2 = path2
|
| 1093 |
+
else:
|
| 1094 |
+
from .implementations.local import trailing_sep
|
| 1095 |
+
|
| 1096 |
+
source_is_str = isinstance(path1, str)
|
| 1097 |
+
paths1 = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth)
|
| 1098 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
| 1099 |
+
# Non-recursive glob does not copy directories
|
| 1100 |
+
paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
|
| 1101 |
+
if not paths1:
|
| 1102 |
+
return
|
| 1103 |
+
|
| 1104 |
+
source_is_file = len(paths1) == 1
|
| 1105 |
+
dest_is_dir = isinstance(path2, str) and (
|
| 1106 |
+
trailing_sep(path2) or self.isdir(path2)
|
| 1107 |
+
)
|
| 1108 |
+
|
| 1109 |
+
exists = source_is_str and (
|
| 1110 |
+
(has_magic(path1) and source_is_file)
|
| 1111 |
+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
|
| 1112 |
+
)
|
| 1113 |
+
paths2 = other_paths(
|
| 1114 |
+
paths1,
|
| 1115 |
+
path2,
|
| 1116 |
+
exists=exists,
|
| 1117 |
+
flatten=not source_is_str,
|
| 1118 |
+
)
|
| 1119 |
+
|
| 1120 |
+
for p1, p2 in zip(paths1, paths2):
|
| 1121 |
+
try:
|
| 1122 |
+
self.cp_file(p1, p2, **kwargs)
|
| 1123 |
+
except FileNotFoundError:
|
| 1124 |
+
if on_error == "raise":
|
| 1125 |
+
raise
|
| 1126 |
+
|
| 1127 |
+
def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
|
| 1128 |
+
"""Turn one or more globs or directories into a list of all matching paths
|
| 1129 |
+
to files or directories.
|
| 1130 |
+
|
| 1131 |
+
kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
|
| 1132 |
+
"""
|
| 1133 |
+
|
| 1134 |
+
if maxdepth is not None and maxdepth < 1:
|
| 1135 |
+
raise ValueError("maxdepth must be at least 1")
|
| 1136 |
+
|
| 1137 |
+
if isinstance(path, str):
|
| 1138 |
+
out = self.expand_path([path], recursive, maxdepth)
|
| 1139 |
+
else:
|
| 1140 |
+
out = set()
|
| 1141 |
+
path = [self._strip_protocol(p) for p in path]
|
| 1142 |
+
for p in path:
|
| 1143 |
+
if has_magic(p):
|
| 1144 |
+
bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
|
| 1145 |
+
out |= bit
|
| 1146 |
+
if recursive:
|
| 1147 |
+
# glob call above expanded one depth so if maxdepth is defined
|
| 1148 |
+
# then decrement it in expand_path call below. If it is zero
|
| 1149 |
+
# after decrementing then avoid expand_path call.
|
| 1150 |
+
if maxdepth is not None and maxdepth <= 1:
|
| 1151 |
+
continue
|
| 1152 |
+
out |= set(
|
| 1153 |
+
self.expand_path(
|
| 1154 |
+
list(bit),
|
| 1155 |
+
recursive=recursive,
|
| 1156 |
+
maxdepth=maxdepth - 1 if maxdepth is not None else None,
|
| 1157 |
+
**kwargs,
|
| 1158 |
+
)
|
| 1159 |
+
)
|
| 1160 |
+
continue
|
| 1161 |
+
elif recursive:
|
| 1162 |
+
rec = set(
|
| 1163 |
+
self.find(
|
| 1164 |
+
p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
|
| 1165 |
+
)
|
| 1166 |
+
)
|
| 1167 |
+
out |= rec
|
| 1168 |
+
if p not in out and (recursive is False or self.exists(p)):
|
| 1169 |
+
# should only check once, for the root
|
| 1170 |
+
out.add(p)
|
| 1171 |
+
if not out:
|
| 1172 |
+
raise FileNotFoundError(path)
|
| 1173 |
+
return sorted(out)
|
| 1174 |
+
|
| 1175 |
+
def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
|
| 1176 |
+
"""Move file(s) from one location to another"""
|
| 1177 |
+
if path1 == path2:
|
| 1178 |
+
logger.debug("%s mv: The paths are the same, so no files were moved.", self)
|
| 1179 |
+
else:
|
| 1180 |
+
self.copy(path1, path2, recursive=recursive, maxdepth=maxdepth)
|
| 1181 |
+
self.rm(path1, recursive=recursive)
|
| 1182 |
+
|
| 1183 |
+
def rm_file(self, path):
|
| 1184 |
+
"""Delete a file"""
|
| 1185 |
+
self._rm(path)
|
| 1186 |
+
|
| 1187 |
+
def _rm(self, path):
|
| 1188 |
+
"""Delete one file"""
|
| 1189 |
+
# this is the old name for the method, prefer rm_file
|
| 1190 |
+
raise NotImplementedError
|
| 1191 |
+
|
| 1192 |
+
def rm(self, path, recursive=False, maxdepth=None):
|
| 1193 |
+
"""Delete files.
|
| 1194 |
+
|
| 1195 |
+
Parameters
|
| 1196 |
+
----------
|
| 1197 |
+
path: str or list of str
|
| 1198 |
+
File(s) to delete.
|
| 1199 |
+
recursive: bool
|
| 1200 |
+
If file(s) are directories, recursively delete contents and then
|
| 1201 |
+
also remove the directory
|
| 1202 |
+
maxdepth: int or None
|
| 1203 |
+
Depth to pass to walk for finding files to delete, if recursive.
|
| 1204 |
+
If None, there will be no limit and infinite recursion may be
|
| 1205 |
+
possible.
|
| 1206 |
+
"""
|
| 1207 |
+
path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
|
| 1208 |
+
for p in reversed(path):
|
| 1209 |
+
self.rm_file(p)
|
| 1210 |
+
|
| 1211 |
+
@classmethod
|
| 1212 |
+
def _parent(cls, path):
|
| 1213 |
+
path = cls._strip_protocol(path)
|
| 1214 |
+
if "/" in path:
|
| 1215 |
+
parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
|
| 1216 |
+
return cls.root_marker + parent
|
| 1217 |
+
else:
|
| 1218 |
+
return cls.root_marker
|
| 1219 |
+
|
| 1220 |
+
def _open(
|
| 1221 |
+
self,
|
| 1222 |
+
path,
|
| 1223 |
+
mode="rb",
|
| 1224 |
+
block_size=None,
|
| 1225 |
+
autocommit=True,
|
| 1226 |
+
cache_options=None,
|
| 1227 |
+
**kwargs,
|
| 1228 |
+
):
|
| 1229 |
+
"""Return raw bytes-mode file-like from the file-system"""
|
| 1230 |
+
return AbstractBufferedFile(
|
| 1231 |
+
self,
|
| 1232 |
+
path,
|
| 1233 |
+
mode,
|
| 1234 |
+
block_size,
|
| 1235 |
+
autocommit,
|
| 1236 |
+
cache_options=cache_options,
|
| 1237 |
+
**kwargs,
|
| 1238 |
+
)
|
| 1239 |
+
|
| 1240 |
+
def open(
|
| 1241 |
+
self,
|
| 1242 |
+
path,
|
| 1243 |
+
mode="rb",
|
| 1244 |
+
block_size=None,
|
| 1245 |
+
cache_options=None,
|
| 1246 |
+
compression=None,
|
| 1247 |
+
**kwargs,
|
| 1248 |
+
):
|
| 1249 |
+
"""
|
| 1250 |
+
Return a file-like object from the filesystem
|
| 1251 |
+
|
| 1252 |
+
The resultant instance must function correctly in a context ``with``
|
| 1253 |
+
block.
|
| 1254 |
+
|
| 1255 |
+
Parameters
|
| 1256 |
+
----------
|
| 1257 |
+
path: str
|
| 1258 |
+
Target file
|
| 1259 |
+
mode: str like 'rb', 'w'
|
| 1260 |
+
See builtin ``open()``
|
| 1261 |
+
block_size: int
|
| 1262 |
+
Some indication of buffering - this is a value in bytes
|
| 1263 |
+
cache_options : dict, optional
|
| 1264 |
+
Extra arguments to pass through to the cache.
|
| 1265 |
+
compression: string or None
|
| 1266 |
+
If given, open file using compression codec. Can either be a compression
|
| 1267 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
| 1268 |
+
compression from the filename suffix.
|
| 1269 |
+
encoding, errors, newline: passed on to TextIOWrapper for text mode
|
| 1270 |
+
"""
|
| 1271 |
+
import io
|
| 1272 |
+
|
| 1273 |
+
path = self._strip_protocol(path)
|
| 1274 |
+
if "b" not in mode:
|
| 1275 |
+
mode = mode.replace("t", "") + "b"
|
| 1276 |
+
|
| 1277 |
+
text_kwargs = {
|
| 1278 |
+
k: kwargs.pop(k)
|
| 1279 |
+
for k in ["encoding", "errors", "newline"]
|
| 1280 |
+
if k in kwargs
|
| 1281 |
+
}
|
| 1282 |
+
return io.TextIOWrapper(
|
| 1283 |
+
self.open(
|
| 1284 |
+
path,
|
| 1285 |
+
mode,
|
| 1286 |
+
block_size=block_size,
|
| 1287 |
+
cache_options=cache_options,
|
| 1288 |
+
compression=compression,
|
| 1289 |
+
**kwargs,
|
| 1290 |
+
),
|
| 1291 |
+
**text_kwargs,
|
| 1292 |
+
)
|
| 1293 |
+
else:
|
| 1294 |
+
ac = kwargs.pop("autocommit", not self._intrans)
|
| 1295 |
+
f = self._open(
|
| 1296 |
+
path,
|
| 1297 |
+
mode=mode,
|
| 1298 |
+
block_size=block_size,
|
| 1299 |
+
autocommit=ac,
|
| 1300 |
+
cache_options=cache_options,
|
| 1301 |
+
**kwargs,
|
| 1302 |
+
)
|
| 1303 |
+
if compression is not None:
|
| 1304 |
+
from fsspec.compression import compr
|
| 1305 |
+
from fsspec.core import get_compression
|
| 1306 |
+
|
| 1307 |
+
compression = get_compression(path, compression)
|
| 1308 |
+
compress = compr[compression]
|
| 1309 |
+
f = compress(f, mode=mode[0])
|
| 1310 |
+
|
| 1311 |
+
if not ac and "r" not in mode:
|
| 1312 |
+
self.transaction.files.append(f)
|
| 1313 |
+
return f
|
| 1314 |
+
|
| 1315 |
+
def touch(self, path, truncate=True, **kwargs):
|
| 1316 |
+
"""Create empty file, or update timestamp
|
| 1317 |
+
|
| 1318 |
+
Parameters
|
| 1319 |
+
----------
|
| 1320 |
+
path: str
|
| 1321 |
+
file location
|
| 1322 |
+
truncate: bool
|
| 1323 |
+
If True, always set file size to 0; if False, update timestamp and
|
| 1324 |
+
leave file unchanged, if backend allows this
|
| 1325 |
+
"""
|
| 1326 |
+
if truncate or not self.exists(path):
|
| 1327 |
+
with self.open(path, "wb", **kwargs):
|
| 1328 |
+
pass
|
| 1329 |
+
else:
|
| 1330 |
+
raise NotImplementedError # update timestamp, if possible
|
| 1331 |
+
|
| 1332 |
+
def ukey(self, path):
|
| 1333 |
+
"""Hash of file properties, to tell if it has changed"""
|
| 1334 |
+
return sha256(str(self.info(path)).encode()).hexdigest()
|
| 1335 |
+
|
| 1336 |
+
def read_block(self, fn, offset, length, delimiter=None):
|
| 1337 |
+
"""Read a block of bytes from
|
| 1338 |
+
|
| 1339 |
+
Starting at ``offset`` of the file, read ``length`` bytes. If
|
| 1340 |
+
``delimiter`` is set then we ensure that the read starts and stops at
|
| 1341 |
+
delimiter boundaries that follow the locations ``offset`` and ``offset
|
| 1342 |
+
+ length``. If ``offset`` is zero then we start at zero. The
|
| 1343 |
+
bytestring returned WILL include the end delimiter string.
|
| 1344 |
+
|
| 1345 |
+
If offset+length is beyond the eof, reads to eof.
|
| 1346 |
+
|
| 1347 |
+
Parameters
|
| 1348 |
+
----------
|
| 1349 |
+
fn: string
|
| 1350 |
+
Path to filename
|
| 1351 |
+
offset: int
|
| 1352 |
+
Byte offset to start read
|
| 1353 |
+
length: int
|
| 1354 |
+
Number of bytes to read. If None, read to end.
|
| 1355 |
+
delimiter: bytes (optional)
|
| 1356 |
+
Ensure reading starts and stops at delimiter bytestring
|
| 1357 |
+
|
| 1358 |
+
Examples
|
| 1359 |
+
--------
|
| 1360 |
+
>>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP
|
| 1361 |
+
b'Alice, 100\\nBo'
|
| 1362 |
+
>>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
| 1363 |
+
b'Alice, 100\\nBob, 200\\n'
|
| 1364 |
+
|
| 1365 |
+
Use ``length=None`` to read to the end of the file.
|
| 1366 |
+
>>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP
|
| 1367 |
+
b'Alice, 100\\nBob, 200\\nCharlie, 300'
|
| 1368 |
+
|
| 1369 |
+
See Also
|
| 1370 |
+
--------
|
| 1371 |
+
:func:`fsspec.utils.read_block`
|
| 1372 |
+
"""
|
| 1373 |
+
with self.open(fn, "rb") as f:
|
| 1374 |
+
size = f.size
|
| 1375 |
+
if length is None:
|
| 1376 |
+
length = size
|
| 1377 |
+
if size is not None and offset + length > size:
|
| 1378 |
+
length = size - offset
|
| 1379 |
+
return read_block(f, offset, length, delimiter)
|
| 1380 |
+
|
| 1381 |
+
def to_json(self):
|
| 1382 |
+
"""
|
| 1383 |
+
JSON representation of this filesystem instance
|
| 1384 |
+
|
| 1385 |
+
Returns
|
| 1386 |
+
-------
|
| 1387 |
+
str: JSON structure with keys cls (the python location of this class),
|
| 1388 |
+
protocol (text name of this class's protocol, first one in case of
|
| 1389 |
+
multiple), args (positional args, usually empty), and all other
|
| 1390 |
+
kwargs as their own keys.
|
| 1391 |
+
"""
|
| 1392 |
+
import json
|
| 1393 |
+
|
| 1394 |
+
cls = type(self)
|
| 1395 |
+
cls = ".".join((cls.__module__, cls.__name__))
|
| 1396 |
+
proto = (
|
| 1397 |
+
self.protocol[0]
|
| 1398 |
+
if isinstance(self.protocol, (tuple, list))
|
| 1399 |
+
else self.protocol
|
| 1400 |
+
)
|
| 1401 |
+
return json.dumps(
|
| 1402 |
+
dict(
|
| 1403 |
+
**{"cls": cls, "protocol": proto, "args": self.storage_args},
|
| 1404 |
+
**self.storage_options,
|
| 1405 |
+
)
|
| 1406 |
+
)
|
| 1407 |
+
|
| 1408 |
+
@staticmethod
|
| 1409 |
+
def from_json(blob):
|
| 1410 |
+
"""
|
| 1411 |
+
Recreate a filesystem instance from JSON representation
|
| 1412 |
+
|
| 1413 |
+
See ``.to_json()`` for the expected structure of the input
|
| 1414 |
+
|
| 1415 |
+
Parameters
|
| 1416 |
+
----------
|
| 1417 |
+
blob: str
|
| 1418 |
+
|
| 1419 |
+
Returns
|
| 1420 |
+
-------
|
| 1421 |
+
file system instance, not necessarily of this particular class.
|
| 1422 |
+
"""
|
| 1423 |
+
import json
|
| 1424 |
+
|
| 1425 |
+
from .registry import _import_class, get_filesystem_class
|
| 1426 |
+
|
| 1427 |
+
dic = json.loads(blob)
|
| 1428 |
+
protocol = dic.pop("protocol")
|
| 1429 |
+
try:
|
| 1430 |
+
cls = _import_class(dic.pop("cls"))
|
| 1431 |
+
except (ImportError, ValueError, RuntimeError, KeyError):
|
| 1432 |
+
cls = get_filesystem_class(protocol)
|
| 1433 |
+
return cls(*dic.pop("args", ()), **dic)
|
| 1434 |
+
|
| 1435 |
+
def _get_pyarrow_filesystem(self):
|
| 1436 |
+
"""
|
| 1437 |
+
Make a version of the FS instance which will be acceptable to pyarrow
|
| 1438 |
+
"""
|
| 1439 |
+
# all instances already also derive from pyarrow
|
| 1440 |
+
return self
|
| 1441 |
+
|
| 1442 |
+
def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
|
| 1443 |
+
"""Create key/value store based on this file-system
|
| 1444 |
+
|
| 1445 |
+
Makes a MutableMapping interface to the FS at the given root path.
|
| 1446 |
+
See ``fsspec.mapping.FSMap`` for further details.
|
| 1447 |
+
"""
|
| 1448 |
+
from .mapping import FSMap
|
| 1449 |
+
|
| 1450 |
+
return FSMap(
|
| 1451 |
+
root,
|
| 1452 |
+
self,
|
| 1453 |
+
check=check,
|
| 1454 |
+
create=create,
|
| 1455 |
+
missing_exceptions=missing_exceptions,
|
| 1456 |
+
)
|
| 1457 |
+
|
| 1458 |
+
@classmethod
|
| 1459 |
+
def clear_instance_cache(cls):
|
| 1460 |
+
"""
|
| 1461 |
+
Clear the cache of filesystem instances.
|
| 1462 |
+
|
| 1463 |
+
Notes
|
| 1464 |
+
-----
|
| 1465 |
+
Unless overridden by setting the ``cachable`` class attribute to False,
|
| 1466 |
+
the filesystem class stores a reference to newly created instances. This
|
| 1467 |
+
prevents Python's normal rules around garbage collection from working,
|
| 1468 |
+
since the instances refcount will not drop to zero until
|
| 1469 |
+
``clear_instance_cache`` is called.
|
| 1470 |
+
"""
|
| 1471 |
+
cls._cache.clear()
|
| 1472 |
+
|
| 1473 |
+
def created(self, path):
|
| 1474 |
+
"""Return the created timestamp of a file as a datetime.datetime"""
|
| 1475 |
+
raise NotImplementedError
|
| 1476 |
+
|
| 1477 |
+
def modified(self, path):
|
| 1478 |
+
"""Return the modified timestamp of a file as a datetime.datetime"""
|
| 1479 |
+
raise NotImplementedError
|
| 1480 |
+
|
| 1481 |
+
# ------------------------------------------------------------------------
|
| 1482 |
+
# Aliases
|
| 1483 |
+
|
| 1484 |
+
def read_bytes(self, path, start=None, end=None, **kwargs):
|
| 1485 |
+
"""Alias of `AbstractFileSystem.cat_file`."""
|
| 1486 |
+
return self.cat_file(path, start=start, end=end, **kwargs)
|
| 1487 |
+
|
| 1488 |
+
def write_bytes(self, path, value, **kwargs):
|
| 1489 |
+
"""Alias of `AbstractFileSystem.pipe_file`."""
|
| 1490 |
+
self.pipe_file(path, value, **kwargs)
|
| 1491 |
+
|
| 1492 |
+
def makedir(self, path, create_parents=True, **kwargs):
|
| 1493 |
+
"""Alias of `AbstractFileSystem.mkdir`."""
|
| 1494 |
+
return self.mkdir(path, create_parents=create_parents, **kwargs)
|
| 1495 |
+
|
| 1496 |
+
def mkdirs(self, path, exist_ok=False):
|
| 1497 |
+
"""Alias of `AbstractFileSystem.makedirs`."""
|
| 1498 |
+
return self.makedirs(path, exist_ok=exist_ok)
|
| 1499 |
+
|
| 1500 |
+
def listdir(self, path, detail=True, **kwargs):
|
| 1501 |
+
"""Alias of `AbstractFileSystem.ls`."""
|
| 1502 |
+
return self.ls(path, detail=detail, **kwargs)
|
| 1503 |
+
|
| 1504 |
+
def cp(self, path1, path2, **kwargs):
|
| 1505 |
+
"""Alias of `AbstractFileSystem.copy`."""
|
| 1506 |
+
return self.copy(path1, path2, **kwargs)
|
| 1507 |
+
|
| 1508 |
+
def move(self, path1, path2, **kwargs):
|
| 1509 |
+
"""Alias of `AbstractFileSystem.mv`."""
|
| 1510 |
+
return self.mv(path1, path2, **kwargs)
|
| 1511 |
+
|
| 1512 |
+
def stat(self, path, **kwargs):
|
| 1513 |
+
"""Alias of `AbstractFileSystem.info`."""
|
| 1514 |
+
return self.info(path, **kwargs)
|
| 1515 |
+
|
| 1516 |
+
def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
|
| 1517 |
+
"""Alias of `AbstractFileSystem.du`."""
|
| 1518 |
+
return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
|
| 1519 |
+
|
| 1520 |
+
def rename(self, path1, path2, **kwargs):
|
| 1521 |
+
"""Alias of `AbstractFileSystem.mv`."""
|
| 1522 |
+
return self.mv(path1, path2, **kwargs)
|
| 1523 |
+
|
| 1524 |
+
def delete(self, path, recursive=False, maxdepth=None):
|
| 1525 |
+
"""Alias of `AbstractFileSystem.rm`."""
|
| 1526 |
+
return self.rm(path, recursive=recursive, maxdepth=maxdepth)
|
| 1527 |
+
|
| 1528 |
+
def upload(self, lpath, rpath, recursive=False, **kwargs):
|
| 1529 |
+
"""Alias of `AbstractFileSystem.put`."""
|
| 1530 |
+
return self.put(lpath, rpath, recursive=recursive, **kwargs)
|
| 1531 |
+
|
| 1532 |
+
def download(self, rpath, lpath, recursive=False, **kwargs):
|
| 1533 |
+
"""Alias of `AbstractFileSystem.get`."""
|
| 1534 |
+
return self.get(rpath, lpath, recursive=recursive, **kwargs)
|
| 1535 |
+
|
| 1536 |
+
def sign(self, path, expiration=100, **kwargs):
|
| 1537 |
+
"""Create a signed URL representing the given path
|
| 1538 |
+
|
| 1539 |
+
Some implementations allow temporary URLs to be generated, as a
|
| 1540 |
+
way of delegating credentials.
|
| 1541 |
+
|
| 1542 |
+
Parameters
|
| 1543 |
+
----------
|
| 1544 |
+
path : str
|
| 1545 |
+
The path on the filesystem
|
| 1546 |
+
expiration : int
|
| 1547 |
+
Number of seconds to enable the URL for (if supported)
|
| 1548 |
+
|
| 1549 |
+
Returns
|
| 1550 |
+
-------
|
| 1551 |
+
URL : str
|
| 1552 |
+
The signed URL
|
| 1553 |
+
|
| 1554 |
+
Raises
|
| 1555 |
+
------
|
| 1556 |
+
NotImplementedError : if method is not implemented for a filesystem
|
| 1557 |
+
"""
|
| 1558 |
+
raise NotImplementedError("Sign is not implemented for this filesystem")
|
| 1559 |
+
|
| 1560 |
+
def _isfilestore(self):
|
| 1561 |
+
# Originally inherited from pyarrow DaskFileSystem. Keeping this
|
| 1562 |
+
# here for backwards compatibility as long as pyarrow uses its
|
| 1563 |
+
# legacy fsspec-compatible filesystems and thus accepts fsspec
|
| 1564 |
+
# filesystems as well
|
| 1565 |
+
return False
|
| 1566 |
+
|
| 1567 |
+
|
| 1568 |
+
class AbstractBufferedFile(io.IOBase):
|
| 1569 |
+
"""Convenient class to derive from to provide buffering
|
| 1570 |
+
|
| 1571 |
+
In the case that the backend does not provide a pythonic file-like object
|
| 1572 |
+
already, this class contains much of the logic to build one. The only
|
| 1573 |
+
methods that need to be overridden are ``_upload_chunk``,
|
| 1574 |
+
``_initiate_upload`` and ``_fetch_range``.
|
| 1575 |
+
"""
|
| 1576 |
+
|
| 1577 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
| 1578 |
+
_details = None
|
| 1579 |
+
|
| 1580 |
+
def __init__(
|
| 1581 |
+
self,
|
| 1582 |
+
fs,
|
| 1583 |
+
path,
|
| 1584 |
+
mode="rb",
|
| 1585 |
+
block_size="default",
|
| 1586 |
+
autocommit=True,
|
| 1587 |
+
cache_type="readahead",
|
| 1588 |
+
cache_options=None,
|
| 1589 |
+
size=None,
|
| 1590 |
+
**kwargs,
|
| 1591 |
+
):
|
| 1592 |
+
"""
|
| 1593 |
+
Template for files with buffered reading and writing
|
| 1594 |
+
|
| 1595 |
+
Parameters
|
| 1596 |
+
----------
|
| 1597 |
+
fs: instance of FileSystem
|
| 1598 |
+
path: str
|
| 1599 |
+
location in file-system
|
| 1600 |
+
mode: str
|
| 1601 |
+
Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
|
| 1602 |
+
systems may be read-only, and some may not support append.
|
| 1603 |
+
block_size: int
|
| 1604 |
+
Buffer size for reading or writing, 'default' for class default
|
| 1605 |
+
autocommit: bool
|
| 1606 |
+
Whether to write to final destination; may only impact what
|
| 1607 |
+
happens when file is being closed.
|
| 1608 |
+
cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
|
| 1609 |
+
Caching policy in read mode. See the definitions in ``core``.
|
| 1610 |
+
cache_options : dict
|
| 1611 |
+
Additional options passed to the constructor for the cache specified
|
| 1612 |
+
by `cache_type`.
|
| 1613 |
+
size: int
|
| 1614 |
+
If given and in read mode, suppressed having to look up the file size
|
| 1615 |
+
kwargs:
|
| 1616 |
+
Gets stored as self.kwargs
|
| 1617 |
+
"""
|
| 1618 |
+
from .core import caches
|
| 1619 |
+
|
| 1620 |
+
self.path = path
|
| 1621 |
+
self.fs = fs
|
| 1622 |
+
self.mode = mode
|
| 1623 |
+
self.blocksize = (
|
| 1624 |
+
self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
|
| 1625 |
+
)
|
| 1626 |
+
self.loc = 0
|
| 1627 |
+
self.autocommit = autocommit
|
| 1628 |
+
self.end = None
|
| 1629 |
+
self.start = None
|
| 1630 |
+
self.closed = False
|
| 1631 |
+
|
| 1632 |
+
if cache_options is None:
|
| 1633 |
+
cache_options = {}
|
| 1634 |
+
|
| 1635 |
+
if "trim" in kwargs:
|
| 1636 |
+
warnings.warn(
|
| 1637 |
+
"Passing 'trim' to control the cache behavior has been deprecated. "
|
| 1638 |
+
"Specify it within the 'cache_options' argument instead.",
|
| 1639 |
+
FutureWarning,
|
| 1640 |
+
)
|
| 1641 |
+
cache_options["trim"] = kwargs.pop("trim")
|
| 1642 |
+
|
| 1643 |
+
self.kwargs = kwargs
|
| 1644 |
+
|
| 1645 |
+
if mode not in {"ab", "rb", "wb"}:
|
| 1646 |
+
raise NotImplementedError("File mode not supported")
|
| 1647 |
+
if mode == "rb":
|
| 1648 |
+
if size is not None:
|
| 1649 |
+
self.size = size
|
| 1650 |
+
else:
|
| 1651 |
+
self.size = self.details["size"]
|
| 1652 |
+
self.cache = caches[cache_type](
|
| 1653 |
+
self.blocksize, self._fetch_range, self.size, **cache_options
|
| 1654 |
+
)
|
| 1655 |
+
else:
|
| 1656 |
+
self.buffer = io.BytesIO()
|
| 1657 |
+
self.offset = None
|
| 1658 |
+
self.forced = False
|
| 1659 |
+
self.location = None
|
| 1660 |
+
|
| 1661 |
+
@property
|
| 1662 |
+
def details(self):
|
| 1663 |
+
if self._details is None:
|
| 1664 |
+
self._details = self.fs.info(self.path)
|
| 1665 |
+
return self._details
|
| 1666 |
+
|
| 1667 |
+
@details.setter
|
| 1668 |
+
def details(self, value):
|
| 1669 |
+
self._details = value
|
| 1670 |
+
self.size = value["size"]
|
| 1671 |
+
|
| 1672 |
+
@property
|
| 1673 |
+
def full_name(self):
|
| 1674 |
+
return _unstrip_protocol(self.path, self.fs)
|
| 1675 |
+
|
| 1676 |
+
@property
|
| 1677 |
+
def closed(self):
|
| 1678 |
+
# get around this attr being read-only in IOBase
|
| 1679 |
+
# use getattr here, since this can be called during del
|
| 1680 |
+
return getattr(self, "_closed", True)
|
| 1681 |
+
|
| 1682 |
+
@closed.setter
|
| 1683 |
+
def closed(self, c):
|
| 1684 |
+
self._closed = c
|
| 1685 |
+
|
| 1686 |
+
def __hash__(self):
|
| 1687 |
+
if "w" in self.mode:
|
| 1688 |
+
return id(self)
|
| 1689 |
+
else:
|
| 1690 |
+
return int(tokenize(self.details), 16)
|
| 1691 |
+
|
| 1692 |
+
def __eq__(self, other):
|
| 1693 |
+
"""Files are equal if they have the same checksum, only in read mode"""
|
| 1694 |
+
return self.mode == "rb" and other.mode == "rb" and hash(self) == hash(other)
|
| 1695 |
+
|
| 1696 |
+
def commit(self):
|
| 1697 |
+
"""Move from temp to final destination"""
|
| 1698 |
+
|
| 1699 |
+
def discard(self):
|
| 1700 |
+
"""Throw away temporary file"""
|
| 1701 |
+
|
| 1702 |
+
def info(self):
|
| 1703 |
+
"""File information about this path"""
|
| 1704 |
+
if "r" in self.mode:
|
| 1705 |
+
return self.details
|
| 1706 |
+
else:
|
| 1707 |
+
raise ValueError("Info not available while writing")
|
| 1708 |
+
|
| 1709 |
+
def tell(self):
|
| 1710 |
+
"""Current file location"""
|
| 1711 |
+
return self.loc
|
| 1712 |
+
|
| 1713 |
+
def seek(self, loc, whence=0):
|
| 1714 |
+
"""Set current file location
|
| 1715 |
+
|
| 1716 |
+
Parameters
|
| 1717 |
+
----------
|
| 1718 |
+
loc: int
|
| 1719 |
+
byte location
|
| 1720 |
+
whence: {0, 1, 2}
|
| 1721 |
+
from start of file, current location or end of file, resp.
|
| 1722 |
+
"""
|
| 1723 |
+
loc = int(loc)
|
| 1724 |
+
if not self.mode == "rb":
|
| 1725 |
+
raise OSError(ESPIPE, "Seek only available in read mode")
|
| 1726 |
+
if whence == 0:
|
| 1727 |
+
nloc = loc
|
| 1728 |
+
elif whence == 1:
|
| 1729 |
+
nloc = self.loc + loc
|
| 1730 |
+
elif whence == 2:
|
| 1731 |
+
nloc = self.size + loc
|
| 1732 |
+
else:
|
| 1733 |
+
raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
|
| 1734 |
+
if nloc < 0:
|
| 1735 |
+
raise ValueError("Seek before start of file")
|
| 1736 |
+
self.loc = nloc
|
| 1737 |
+
return self.loc
|
| 1738 |
+
|
| 1739 |
+
def write(self, data):
|
| 1740 |
+
"""
|
| 1741 |
+
Write data to buffer.
|
| 1742 |
+
|
| 1743 |
+
Buffer only sent on flush() or if buffer is greater than
|
| 1744 |
+
or equal to blocksize.
|
| 1745 |
+
|
| 1746 |
+
Parameters
|
| 1747 |
+
----------
|
| 1748 |
+
data: bytes
|
| 1749 |
+
Set of bytes to be written.
|
| 1750 |
+
"""
|
| 1751 |
+
if self.mode not in {"wb", "ab"}:
|
| 1752 |
+
raise ValueError("File not in write mode")
|
| 1753 |
+
if self.closed:
|
| 1754 |
+
raise ValueError("I/O operation on closed file.")
|
| 1755 |
+
if self.forced:
|
| 1756 |
+
raise ValueError("This file has been force-flushed, can only close")
|
| 1757 |
+
out = self.buffer.write(data)
|
| 1758 |
+
self.loc += out
|
| 1759 |
+
if self.buffer.tell() >= self.blocksize:
|
| 1760 |
+
self.flush()
|
| 1761 |
+
return out
|
| 1762 |
+
|
| 1763 |
+
def flush(self, force=False):
|
| 1764 |
+
"""
|
| 1765 |
+
Write buffered data to backend store.
|
| 1766 |
+
|
| 1767 |
+
Writes the current buffer, if it is larger than the block-size, or if
|
| 1768 |
+
the file is being closed.
|
| 1769 |
+
|
| 1770 |
+
Parameters
|
| 1771 |
+
----------
|
| 1772 |
+
force: bool
|
| 1773 |
+
When closing, write the last block even if it is smaller than
|
| 1774 |
+
blocks are allowed to be. Disallows further writing to this file.
|
| 1775 |
+
"""
|
| 1776 |
+
|
| 1777 |
+
if self.closed:
|
| 1778 |
+
raise ValueError("Flush on closed file")
|
| 1779 |
+
if force and self.forced:
|
| 1780 |
+
raise ValueError("Force flush cannot be called more than once")
|
| 1781 |
+
if force:
|
| 1782 |
+
self.forced = True
|
| 1783 |
+
|
| 1784 |
+
if self.mode not in {"wb", "ab"}:
|
| 1785 |
+
# no-op to flush on read-mode
|
| 1786 |
+
return
|
| 1787 |
+
|
| 1788 |
+
if not force and self.buffer.tell() < self.blocksize:
|
| 1789 |
+
# Defer write on small block
|
| 1790 |
+
return
|
| 1791 |
+
|
| 1792 |
+
if self.offset is None:
|
| 1793 |
+
# Initialize a multipart upload
|
| 1794 |
+
self.offset = 0
|
| 1795 |
+
try:
|
| 1796 |
+
self._initiate_upload()
|
| 1797 |
+
except: # noqa: E722
|
| 1798 |
+
self.closed = True
|
| 1799 |
+
raise
|
| 1800 |
+
|
| 1801 |
+
if self._upload_chunk(final=force) is not False:
|
| 1802 |
+
self.offset += self.buffer.seek(0, 2)
|
| 1803 |
+
self.buffer = io.BytesIO()
|
| 1804 |
+
|
| 1805 |
+
def _upload_chunk(self, final=False):
|
| 1806 |
+
"""Write one part of a multi-block file upload
|
| 1807 |
+
|
| 1808 |
+
Parameters
|
| 1809 |
+
==========
|
| 1810 |
+
final: bool
|
| 1811 |
+
This is the last block, so should complete file, if
|
| 1812 |
+
self.autocommit is True.
|
| 1813 |
+
"""
|
| 1814 |
+
# may not yet have been initialized, may need to call _initialize_upload
|
| 1815 |
+
|
| 1816 |
+
def _initiate_upload(self):
|
| 1817 |
+
"""Create remote file/upload"""
|
| 1818 |
+
pass
|
| 1819 |
+
|
| 1820 |
+
def _fetch_range(self, start, end):
|
| 1821 |
+
"""Get the specified set of bytes from remote"""
|
| 1822 |
+
raise NotImplementedError
|
| 1823 |
+
|
| 1824 |
+
def read(self, length=-1):
|
| 1825 |
+
"""
|
| 1826 |
+
Return data from cache, or fetch pieces as necessary
|
| 1827 |
+
|
| 1828 |
+
Parameters
|
| 1829 |
+
----------
|
| 1830 |
+
length: int (-1)
|
| 1831 |
+
Number of bytes to read; if <0, all remaining bytes.
|
| 1832 |
+
"""
|
| 1833 |
+
length = -1 if length is None else int(length)
|
| 1834 |
+
if self.mode != "rb":
|
| 1835 |
+
raise ValueError("File not in read mode")
|
| 1836 |
+
if length < 0:
|
| 1837 |
+
length = self.size - self.loc
|
| 1838 |
+
if self.closed:
|
| 1839 |
+
raise ValueError("I/O operation on closed file.")
|
| 1840 |
+
logger.debug("%s read: %i - %i", self, self.loc, self.loc + length)
|
| 1841 |
+
if length == 0:
|
| 1842 |
+
# don't even bother calling fetch
|
| 1843 |
+
return b""
|
| 1844 |
+
out = self.cache._fetch(self.loc, self.loc + length)
|
| 1845 |
+
self.loc += len(out)
|
| 1846 |
+
return out
|
| 1847 |
+
|
| 1848 |
+
def readinto(self, b):
|
| 1849 |
+
"""mirrors builtin file's readinto method
|
| 1850 |
+
|
| 1851 |
+
https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
|
| 1852 |
+
"""
|
| 1853 |
+
out = memoryview(b).cast("B")
|
| 1854 |
+
data = self.read(out.nbytes)
|
| 1855 |
+
out[: len(data)] = data
|
| 1856 |
+
return len(data)
|
| 1857 |
+
|
| 1858 |
+
def readuntil(self, char=b"\n", blocks=None):
|
| 1859 |
+
"""Return data between current position and first occurrence of char
|
| 1860 |
+
|
| 1861 |
+
char is included in the output, except if the end of the tile is
|
| 1862 |
+
encountered first.
|
| 1863 |
+
|
| 1864 |
+
Parameters
|
| 1865 |
+
----------
|
| 1866 |
+
char: bytes
|
| 1867 |
+
Thing to find
|
| 1868 |
+
blocks: None or int
|
| 1869 |
+
How much to read in each go. Defaults to file blocksize - which may
|
| 1870 |
+
mean a new read on every call.
|
| 1871 |
+
"""
|
| 1872 |
+
out = []
|
| 1873 |
+
while True:
|
| 1874 |
+
start = self.tell()
|
| 1875 |
+
part = self.read(blocks or self.blocksize)
|
| 1876 |
+
if len(part) == 0:
|
| 1877 |
+
break
|
| 1878 |
+
found = part.find(char)
|
| 1879 |
+
if found > -1:
|
| 1880 |
+
out.append(part[: found + len(char)])
|
| 1881 |
+
self.seek(start + found + len(char))
|
| 1882 |
+
break
|
| 1883 |
+
out.append(part)
|
| 1884 |
+
return b"".join(out)
|
| 1885 |
+
|
| 1886 |
+
def readline(self):
|
| 1887 |
+
"""Read until first occurrence of newline character
|
| 1888 |
+
|
| 1889 |
+
Note that, because of character encoding, this is not necessarily a
|
| 1890 |
+
true line ending.
|
| 1891 |
+
"""
|
| 1892 |
+
return self.readuntil(b"\n")
|
| 1893 |
+
|
| 1894 |
+
def __next__(self):
|
| 1895 |
+
out = self.readline()
|
| 1896 |
+
if out:
|
| 1897 |
+
return out
|
| 1898 |
+
raise StopIteration
|
| 1899 |
+
|
| 1900 |
+
def __iter__(self):
|
| 1901 |
+
return self
|
| 1902 |
+
|
| 1903 |
+
def readlines(self):
|
| 1904 |
+
"""Return all data, split by the newline character"""
|
| 1905 |
+
data = self.read()
|
| 1906 |
+
lines = data.split(b"\n")
|
| 1907 |
+
out = [l + b"\n" for l in lines[:-1]]
|
| 1908 |
+
if data.endswith(b"\n"):
|
| 1909 |
+
return out
|
| 1910 |
+
else:
|
| 1911 |
+
return out + [lines[-1]]
|
| 1912 |
+
# return list(self) ???
|
| 1913 |
+
|
| 1914 |
+
def readinto1(self, b):
|
| 1915 |
+
return self.readinto(b)
|
| 1916 |
+
|
| 1917 |
+
def close(self):
|
| 1918 |
+
"""Close file
|
| 1919 |
+
|
| 1920 |
+
Finalizes writes, discards cache
|
| 1921 |
+
"""
|
| 1922 |
+
if getattr(self, "_unclosable", False):
|
| 1923 |
+
return
|
| 1924 |
+
if self.closed:
|
| 1925 |
+
return
|
| 1926 |
+
if self.mode == "rb":
|
| 1927 |
+
self.cache = None
|
| 1928 |
+
else:
|
| 1929 |
+
if not self.forced:
|
| 1930 |
+
self.flush(force=True)
|
| 1931 |
+
|
| 1932 |
+
if self.fs is not None:
|
| 1933 |
+
self.fs.invalidate_cache(self.path)
|
| 1934 |
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
| 1935 |
+
|
| 1936 |
+
self.closed = True
|
| 1937 |
+
|
| 1938 |
+
def readable(self):
|
| 1939 |
+
"""Whether opened for reading"""
|
| 1940 |
+
return self.mode == "rb" and not self.closed
|
| 1941 |
+
|
| 1942 |
+
def seekable(self):
|
| 1943 |
+
"""Whether is seekable (only in read mode)"""
|
| 1944 |
+
return self.readable()
|
| 1945 |
+
|
| 1946 |
+
def writable(self):
|
| 1947 |
+
"""Whether opened for writing"""
|
| 1948 |
+
return self.mode in {"wb", "ab"} and not self.closed
|
| 1949 |
+
|
| 1950 |
+
def __del__(self):
|
| 1951 |
+
if not self.closed:
|
| 1952 |
+
self.close()
|
| 1953 |
+
|
| 1954 |
+
def __str__(self):
|
| 1955 |
+
return f"<File-like object {type(self.fs).__name__}, {self.path}>"
|
| 1956 |
+
|
| 1957 |
+
__repr__ = __str__
|
| 1958 |
+
|
| 1959 |
+
def __enter__(self):
|
| 1960 |
+
return self
|
| 1961 |
+
|
| 1962 |
+
def __exit__(self, *args):
|
| 1963 |
+
self.close()
|
lib/python3.11/site-packages/fsspec/tests/abstract/__init__.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from hashlib import md5
|
| 3 |
+
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from fsspec.implementations.local import LocalFileSystem
|
| 7 |
+
from fsspec.tests.abstract.copy import AbstractCopyTests # noqa
|
| 8 |
+
from fsspec.tests.abstract.get import AbstractGetTests # noqa
|
| 9 |
+
from fsspec.tests.abstract.put import AbstractPutTests # noqa
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class BaseAbstractFixtures:
|
| 13 |
+
"""
|
| 14 |
+
Abstract base class containing fixtures that are used by but never need to
|
| 15 |
+
be overridden in derived filesystem-specific classes to run the abstract
|
| 16 |
+
tests on such filesystems.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
@pytest.fixture
|
| 20 |
+
def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
|
| 21 |
+
"""
|
| 22 |
+
Scenario on remote filesystem that is used for many cp/get/put tests.
|
| 23 |
+
|
| 24 |
+
Cleans up at the end of each test it which it is used.
|
| 25 |
+
"""
|
| 26 |
+
source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
|
| 27 |
+
yield source
|
| 28 |
+
fs.rm(source, recursive=True)
|
| 29 |
+
|
| 30 |
+
@pytest.fixture
|
| 31 |
+
def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
|
| 32 |
+
"""
|
| 33 |
+
Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
|
| 34 |
+
|
| 35 |
+
Cleans up at the end of each test it which it is used.
|
| 36 |
+
"""
|
| 37 |
+
source = self._glob_edge_cases_files(fs, fs_join, fs_path)
|
| 38 |
+
yield source
|
| 39 |
+
fs.rm(source, recursive=True)
|
| 40 |
+
|
| 41 |
+
@pytest.fixture
|
| 42 |
+
def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
|
| 43 |
+
"""
|
| 44 |
+
Scenario on remote filesystem that is used to check cp/get/put on directory
|
| 45 |
+
and file with the same name prefixes.
|
| 46 |
+
|
| 47 |
+
Cleans up at the end of each test it which it is used.
|
| 48 |
+
"""
|
| 49 |
+
source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
|
| 50 |
+
yield source
|
| 51 |
+
fs.rm(source, recursive=True)
|
| 52 |
+
|
| 53 |
+
@pytest.fixture
|
| 54 |
+
def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
|
| 55 |
+
"""
|
| 56 |
+
Scenario on remote filesystem that is used to check cp/get/put files order
|
| 57 |
+
when source and destination are lists.
|
| 58 |
+
|
| 59 |
+
Cleans up at the end of each test it which it is used.
|
| 60 |
+
"""
|
| 61 |
+
source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
|
| 62 |
+
yield source
|
| 63 |
+
fs.rm(source, recursive=True)
|
| 64 |
+
|
| 65 |
+
@pytest.fixture
|
| 66 |
+
def fs_target(self, fs, fs_join, fs_path):
|
| 67 |
+
"""
|
| 68 |
+
Return name of remote directory that does not yet exist to copy into.
|
| 69 |
+
|
| 70 |
+
Cleans up at the end of each test it which it is used.
|
| 71 |
+
"""
|
| 72 |
+
target = fs_join(fs_path, "target")
|
| 73 |
+
yield target
|
| 74 |
+
if fs.exists(target):
|
| 75 |
+
fs.rm(target, recursive=True)
|
| 76 |
+
|
| 77 |
+
@pytest.fixture
|
| 78 |
+
def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
|
| 79 |
+
"""
|
| 80 |
+
Scenario on local filesystem that is used for many cp/get/put tests.
|
| 81 |
+
|
| 82 |
+
Cleans up at the end of each test it which it is used.
|
| 83 |
+
"""
|
| 84 |
+
source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
|
| 85 |
+
yield source
|
| 86 |
+
local_fs.rm(source, recursive=True)
|
| 87 |
+
|
| 88 |
+
@pytest.fixture
|
| 89 |
+
def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
|
| 90 |
+
"""
|
| 91 |
+
Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
|
| 92 |
+
|
| 93 |
+
Cleans up at the end of each test it which it is used.
|
| 94 |
+
"""
|
| 95 |
+
source = self._glob_edge_cases_files(local_fs, local_join, local_path)
|
| 96 |
+
yield source
|
| 97 |
+
local_fs.rm(source, recursive=True)
|
| 98 |
+
|
| 99 |
+
@pytest.fixture
|
| 100 |
+
def local_dir_and_file_with_same_name_prefix(
|
| 101 |
+
self, local_fs, local_join, local_path
|
| 102 |
+
):
|
| 103 |
+
"""
|
| 104 |
+
Scenario on local filesystem that is used to check cp/get/put on directory
|
| 105 |
+
and file with the same name prefixes.
|
| 106 |
+
|
| 107 |
+
Cleans up at the end of each test it which it is used.
|
| 108 |
+
"""
|
| 109 |
+
source = self._dir_and_file_with_same_name_prefix(
|
| 110 |
+
local_fs, local_join, local_path
|
| 111 |
+
)
|
| 112 |
+
yield source
|
| 113 |
+
local_fs.rm(source, recursive=True)
|
| 114 |
+
|
| 115 |
+
@pytest.fixture
|
| 116 |
+
def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
|
| 117 |
+
"""
|
| 118 |
+
Scenario on local filesystem that is used to check cp/get/put files order
|
| 119 |
+
when source and destination are lists.
|
| 120 |
+
|
| 121 |
+
Cleans up at the end of each test it which it is used.
|
| 122 |
+
"""
|
| 123 |
+
source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
|
| 124 |
+
yield source
|
| 125 |
+
local_fs.rm(source, recursive=True)
|
| 126 |
+
|
| 127 |
+
@pytest.fixture
|
| 128 |
+
def local_target(self, local_fs, local_join, local_path):
|
| 129 |
+
"""
|
| 130 |
+
Return name of local directory that does not yet exist to copy into.
|
| 131 |
+
|
| 132 |
+
Cleans up at the end of each test it which it is used.
|
| 133 |
+
"""
|
| 134 |
+
target = local_join(local_path, "target")
|
| 135 |
+
yield target
|
| 136 |
+
if local_fs.exists(target):
|
| 137 |
+
local_fs.rm(target, recursive=True)
|
| 138 |
+
|
| 139 |
+
def _glob_edge_cases_files(self, some_fs, some_join, some_path):
|
| 140 |
+
"""
|
| 141 |
+
Scenario that is used for glob edge cases cp/get/put tests.
|
| 142 |
+
Creates the following directory and file structure:
|
| 143 |
+
|
| 144 |
+
📁 source
|
| 145 |
+
├── 📄 file1
|
| 146 |
+
├── 📄 file2
|
| 147 |
+
├── 📁 subdir0
|
| 148 |
+
│ ├── 📄 subfile1
|
| 149 |
+
│ ├── 📄 subfile2
|
| 150 |
+
│ └── 📁 nesteddir
|
| 151 |
+
│ └── 📄 nestedfile
|
| 152 |
+
└── 📁 subdir1
|
| 153 |
+
├── 📄 subfile1
|
| 154 |
+
├── 📄 subfile2
|
| 155 |
+
└── 📁 nesteddir
|
| 156 |
+
└── 📄 nestedfile
|
| 157 |
+
"""
|
| 158 |
+
source = some_join(some_path, "source")
|
| 159 |
+
some_fs.touch(some_join(source, "file1"))
|
| 160 |
+
some_fs.touch(some_join(source, "file2"))
|
| 161 |
+
|
| 162 |
+
for subdir_idx in range(2):
|
| 163 |
+
subdir = some_join(source, f"subdir{subdir_idx}")
|
| 164 |
+
nesteddir = some_join(subdir, "nesteddir")
|
| 165 |
+
some_fs.makedirs(nesteddir)
|
| 166 |
+
some_fs.touch(some_join(subdir, "subfile1"))
|
| 167 |
+
some_fs.touch(some_join(subdir, "subfile2"))
|
| 168 |
+
some_fs.touch(some_join(nesteddir, "nestedfile"))
|
| 169 |
+
|
| 170 |
+
return source
|
| 171 |
+
|
| 172 |
+
def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
|
| 173 |
+
"""
|
| 174 |
+
Scenario that is used for many cp/get/put tests. Creates the following
|
| 175 |
+
directory and file structure:
|
| 176 |
+
|
| 177 |
+
📁 source
|
| 178 |
+
├── 📄 file1
|
| 179 |
+
├── 📄 file2
|
| 180 |
+
└── 📁 subdir
|
| 181 |
+
├── 📄 subfile1
|
| 182 |
+
├── 📄 subfile2
|
| 183 |
+
└── 📁 nesteddir
|
| 184 |
+
└── 📄 nestedfile
|
| 185 |
+
"""
|
| 186 |
+
source = some_join(some_path, "source")
|
| 187 |
+
subdir = some_join(source, "subdir")
|
| 188 |
+
nesteddir = some_join(subdir, "nesteddir")
|
| 189 |
+
some_fs.makedirs(nesteddir)
|
| 190 |
+
some_fs.touch(some_join(source, "file1"))
|
| 191 |
+
some_fs.touch(some_join(source, "file2"))
|
| 192 |
+
some_fs.touch(some_join(subdir, "subfile1"))
|
| 193 |
+
some_fs.touch(some_join(subdir, "subfile2"))
|
| 194 |
+
some_fs.touch(some_join(nesteddir, "nestedfile"))
|
| 195 |
+
return source
|
| 196 |
+
|
| 197 |
+
def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
|
| 198 |
+
"""
|
| 199 |
+
Scenario that is used to check cp/get/put on directory and file with
|
| 200 |
+
the same name prefixes. Creates the following directory and file structure:
|
| 201 |
+
|
| 202 |
+
📁 source
|
| 203 |
+
├── 📄 subdir.txt
|
| 204 |
+
└── 📁 subdir
|
| 205 |
+
└── 📄 subfile.txt
|
| 206 |
+
"""
|
| 207 |
+
source = some_join(some_path, "source")
|
| 208 |
+
subdir = some_join(source, "subdir")
|
| 209 |
+
file = some_join(source, "subdir.txt")
|
| 210 |
+
subfile = some_join(subdir, "subfile.txt")
|
| 211 |
+
some_fs.makedirs(subdir)
|
| 212 |
+
some_fs.touch(file)
|
| 213 |
+
some_fs.touch(subfile)
|
| 214 |
+
return source
|
| 215 |
+
|
| 216 |
+
def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
|
| 217 |
+
"""
|
| 218 |
+
Scenario that is used to check cp/get/put files order when source and
|
| 219 |
+
destination are lists. Creates the following directory and file structure:
|
| 220 |
+
|
| 221 |
+
📁 source
|
| 222 |
+
└── 📄 {hashed([0-9])}.txt
|
| 223 |
+
"""
|
| 224 |
+
source = some_join(some_path, "source")
|
| 225 |
+
for i in range(10):
|
| 226 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
| 227 |
+
path = some_join(source, f"{hashed_i}.txt")
|
| 228 |
+
some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
|
| 229 |
+
return source
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
class AbstractFixtures(BaseAbstractFixtures):
|
| 233 |
+
"""
|
| 234 |
+
Abstract base class containing fixtures that may be overridden in derived
|
| 235 |
+
filesystem-specific classes to run the abstract tests on such filesystems.
|
| 236 |
+
|
| 237 |
+
For any particular filesystem some of these fixtures must be overridden,
|
| 238 |
+
such as ``fs`` and ``fs_path``, and others may be overridden if the
|
| 239 |
+
default functions here are not appropriate, such as ``fs_join``.
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
@pytest.fixture
|
| 243 |
+
def fs(self):
|
| 244 |
+
raise NotImplementedError("This function must be overridden in derived classes")
|
| 245 |
+
|
| 246 |
+
@pytest.fixture
|
| 247 |
+
def fs_join(self):
|
| 248 |
+
"""
|
| 249 |
+
Return a function that joins its arguments together into a path.
|
| 250 |
+
|
| 251 |
+
Most fsspec implementations join paths in a platform-dependent way,
|
| 252 |
+
but some will override this to always use a forward slash.
|
| 253 |
+
"""
|
| 254 |
+
return os.path.join
|
| 255 |
+
|
| 256 |
+
@pytest.fixture
|
| 257 |
+
def fs_path(self):
|
| 258 |
+
raise NotImplementedError("This function must be overridden in derived classes")
|
| 259 |
+
|
| 260 |
+
@pytest.fixture(scope="class")
|
| 261 |
+
def local_fs(self):
|
| 262 |
+
# Maybe need an option for auto_mkdir=False? This is only relevant
|
| 263 |
+
# for certain implementations.
|
| 264 |
+
return LocalFileSystem(auto_mkdir=True)
|
| 265 |
+
|
| 266 |
+
@pytest.fixture
|
| 267 |
+
def local_join(self):
|
| 268 |
+
"""
|
| 269 |
+
Return a function that joins its arguments together into a path, on
|
| 270 |
+
the local filesystem.
|
| 271 |
+
"""
|
| 272 |
+
return os.path.join
|
| 273 |
+
|
| 274 |
+
@pytest.fixture
|
| 275 |
+
def local_path(self, tmpdir):
|
| 276 |
+
return tmpdir
|
| 277 |
+
|
| 278 |
+
@pytest.fixture
|
| 279 |
+
def supports_empty_directories(self):
|
| 280 |
+
"""
|
| 281 |
+
Return whether this implementation supports empty directories.
|
| 282 |
+
"""
|
| 283 |
+
return True
|
| 284 |
+
|
| 285 |
+
@pytest.fixture
|
| 286 |
+
def fs_sanitize_path(self):
|
| 287 |
+
return lambda x: x
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (15 kB). View file
|
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/common.cpython-311.pyc
ADDED
|
Binary file (2.33 kB). View file
|
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/copy.cpython-311.pyc
ADDED
|
Binary file (26.5 kB). View file
|
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/get.cpython-311.pyc
ADDED
|
Binary file (26.3 kB). View file
|
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/__pycache__/put.cpython-311.pyc
ADDED
|
Binary file (27.7 kB). View file
|
|
|
lib/python3.11/site-packages/fsspec/tests/abstract/common.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GLOB_EDGE_CASES_TESTS = {
|
| 2 |
+
"argnames": ("path", "recursive", "maxdepth", "expected"),
|
| 3 |
+
"argvalues": [
|
| 4 |
+
("fil?1", False, None, ["file1"]),
|
| 5 |
+
("fil?1", True, None, ["file1"]),
|
| 6 |
+
("file[1-2]", False, None, ["file1", "file2"]),
|
| 7 |
+
("file[1-2]", True, None, ["file1", "file2"]),
|
| 8 |
+
("*", False, None, ["file1", "file2"]),
|
| 9 |
+
(
|
| 10 |
+
"*",
|
| 11 |
+
True,
|
| 12 |
+
None,
|
| 13 |
+
[
|
| 14 |
+
"file1",
|
| 15 |
+
"file2",
|
| 16 |
+
"subdir0/subfile1",
|
| 17 |
+
"subdir0/subfile2",
|
| 18 |
+
"subdir0/nesteddir/nestedfile",
|
| 19 |
+
"subdir1/subfile1",
|
| 20 |
+
"subdir1/subfile2",
|
| 21 |
+
"subdir1/nesteddir/nestedfile",
|
| 22 |
+
],
|
| 23 |
+
),
|
| 24 |
+
("*", True, 1, ["file1", "file2"]),
|
| 25 |
+
(
|
| 26 |
+
"*",
|
| 27 |
+
True,
|
| 28 |
+
2,
|
| 29 |
+
[
|
| 30 |
+
"file1",
|
| 31 |
+
"file2",
|
| 32 |
+
"subdir0/subfile1",
|
| 33 |
+
"subdir0/subfile2",
|
| 34 |
+
"subdir1/subfile1",
|
| 35 |
+
"subdir1/subfile2",
|
| 36 |
+
],
|
| 37 |
+
),
|
| 38 |
+
("*1", False, None, ["file1"]),
|
| 39 |
+
(
|
| 40 |
+
"*1",
|
| 41 |
+
True,
|
| 42 |
+
None,
|
| 43 |
+
[
|
| 44 |
+
"file1",
|
| 45 |
+
"subdir1/subfile1",
|
| 46 |
+
"subdir1/subfile2",
|
| 47 |
+
"subdir1/nesteddir/nestedfile",
|
| 48 |
+
],
|
| 49 |
+
),
|
| 50 |
+
("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
|
| 51 |
+
(
|
| 52 |
+
"**",
|
| 53 |
+
False,
|
| 54 |
+
None,
|
| 55 |
+
[
|
| 56 |
+
"file1",
|
| 57 |
+
"file2",
|
| 58 |
+
"subdir0/subfile1",
|
| 59 |
+
"subdir0/subfile2",
|
| 60 |
+
"subdir0/nesteddir/nestedfile",
|
| 61 |
+
"subdir1/subfile1",
|
| 62 |
+
"subdir1/subfile2",
|
| 63 |
+
"subdir1/nesteddir/nestedfile",
|
| 64 |
+
],
|
| 65 |
+
),
|
| 66 |
+
(
|
| 67 |
+
"**",
|
| 68 |
+
True,
|
| 69 |
+
None,
|
| 70 |
+
[
|
| 71 |
+
"file1",
|
| 72 |
+
"file2",
|
| 73 |
+
"subdir0/subfile1",
|
| 74 |
+
"subdir0/subfile2",
|
| 75 |
+
"subdir0/nesteddir/nestedfile",
|
| 76 |
+
"subdir1/subfile1",
|
| 77 |
+
"subdir1/subfile2",
|
| 78 |
+
"subdir1/nesteddir/nestedfile",
|
| 79 |
+
],
|
| 80 |
+
),
|
| 81 |
+
("**", True, 1, ["file1", "file2"]),
|
| 82 |
+
(
|
| 83 |
+
"**",
|
| 84 |
+
True,
|
| 85 |
+
2,
|
| 86 |
+
[
|
| 87 |
+
"file1",
|
| 88 |
+
"file2",
|
| 89 |
+
"subdir0/subfile1",
|
| 90 |
+
"subdir0/subfile2",
|
| 91 |
+
"subdir0/nesteddir/nestedfile",
|
| 92 |
+
"subdir1/subfile1",
|
| 93 |
+
"subdir1/subfile2",
|
| 94 |
+
"subdir1/nesteddir/nestedfile",
|
| 95 |
+
],
|
| 96 |
+
),
|
| 97 |
+
(
|
| 98 |
+
"**",
|
| 99 |
+
False,
|
| 100 |
+
2,
|
| 101 |
+
[
|
| 102 |
+
"file1",
|
| 103 |
+
"file2",
|
| 104 |
+
"subdir0/subfile1",
|
| 105 |
+
"subdir0/subfile2",
|
| 106 |
+
"subdir1/subfile1",
|
| 107 |
+
"subdir1/subfile2",
|
| 108 |
+
],
|
| 109 |
+
),
|
| 110 |
+
("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
| 111 |
+
(
|
| 112 |
+
"**/*1",
|
| 113 |
+
True,
|
| 114 |
+
None,
|
| 115 |
+
[
|
| 116 |
+
"file1",
|
| 117 |
+
"subdir0/subfile1",
|
| 118 |
+
"subdir1/subfile1",
|
| 119 |
+
"subdir1/subfile2",
|
| 120 |
+
"subdir1/nesteddir/nestedfile",
|
| 121 |
+
],
|
| 122 |
+
),
|
| 123 |
+
("**/*1", True, 1, ["file1"]),
|
| 124 |
+
(
|
| 125 |
+
"**/*1",
|
| 126 |
+
True,
|
| 127 |
+
2,
|
| 128 |
+
["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
|
| 129 |
+
),
|
| 130 |
+
("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
| 131 |
+
("**/subdir0", False, None, []),
|
| 132 |
+
("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
|
| 133 |
+
("**/subdir0/nested*", False, 2, []),
|
| 134 |
+
("**/subdir0/nested*", True, 2, ["nestedfile"]),
|
| 135 |
+
("subdir[1-2]", False, None, []),
|
| 136 |
+
("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
|
| 137 |
+
("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
|
| 138 |
+
("subdir[0-1]", False, None, []),
|
| 139 |
+
(
|
| 140 |
+
"subdir[0-1]",
|
| 141 |
+
True,
|
| 142 |
+
None,
|
| 143 |
+
[
|
| 144 |
+
"subdir0/subfile1",
|
| 145 |
+
"subdir0/subfile2",
|
| 146 |
+
"subdir0/nesteddir/nestedfile",
|
| 147 |
+
"subdir1/subfile1",
|
| 148 |
+
"subdir1/subfile2",
|
| 149 |
+
"subdir1/nesteddir/nestedfile",
|
| 150 |
+
],
|
| 151 |
+
),
|
| 152 |
+
(
|
| 153 |
+
"subdir[0-1]/*fil[e]*",
|
| 154 |
+
False,
|
| 155 |
+
None,
|
| 156 |
+
[
|
| 157 |
+
"subdir0/subfile1",
|
| 158 |
+
"subdir0/subfile2",
|
| 159 |
+
"subdir1/subfile1",
|
| 160 |
+
"subdir1/subfile2",
|
| 161 |
+
],
|
| 162 |
+
),
|
| 163 |
+
(
|
| 164 |
+
"subdir[0-1]/*fil[e]*",
|
| 165 |
+
True,
|
| 166 |
+
None,
|
| 167 |
+
[
|
| 168 |
+
"subdir0/subfile1",
|
| 169 |
+
"subdir0/subfile2",
|
| 170 |
+
"subdir1/subfile1",
|
| 171 |
+
"subdir1/subfile2",
|
| 172 |
+
],
|
| 173 |
+
),
|
| 174 |
+
],
|
| 175 |
+
}
|
lib/python3.11/site-packages/fsspec/tests/abstract/copy.py
ADDED
|
@@ -0,0 +1,543 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from hashlib import md5
|
| 2 |
+
from itertools import product
|
| 3 |
+
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class AbstractCopyTests:
|
| 10 |
+
def test_copy_file_to_existing_directory(
|
| 11 |
+
self,
|
| 12 |
+
fs,
|
| 13 |
+
fs_join,
|
| 14 |
+
fs_bulk_operations_scenario_0,
|
| 15 |
+
fs_target,
|
| 16 |
+
supports_empty_directories,
|
| 17 |
+
):
|
| 18 |
+
# Copy scenario 1a
|
| 19 |
+
source = fs_bulk_operations_scenario_0
|
| 20 |
+
|
| 21 |
+
target = fs_target
|
| 22 |
+
fs.mkdir(target)
|
| 23 |
+
if not supports_empty_directories:
|
| 24 |
+
# Force target directory to exist by adding a dummy file
|
| 25 |
+
fs.touch(fs_join(target, "dummy"))
|
| 26 |
+
assert fs.isdir(target)
|
| 27 |
+
|
| 28 |
+
target_file2 = fs_join(target, "file2")
|
| 29 |
+
target_subfile1 = fs_join(target, "subfile1")
|
| 30 |
+
|
| 31 |
+
# Copy from source directory
|
| 32 |
+
fs.cp(fs_join(source, "file2"), target)
|
| 33 |
+
assert fs.isfile(target_file2)
|
| 34 |
+
|
| 35 |
+
# Copy from sub directory
|
| 36 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), target)
|
| 37 |
+
assert fs.isfile(target_subfile1)
|
| 38 |
+
|
| 39 |
+
# Remove copied files
|
| 40 |
+
fs.rm([target_file2, target_subfile1])
|
| 41 |
+
assert not fs.exists(target_file2)
|
| 42 |
+
assert not fs.exists(target_subfile1)
|
| 43 |
+
|
| 44 |
+
# Repeat with trailing slash on target
|
| 45 |
+
fs.cp(fs_join(source, "file2"), target + "/")
|
| 46 |
+
assert fs.isdir(target)
|
| 47 |
+
assert fs.isfile(target_file2)
|
| 48 |
+
|
| 49 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
|
| 50 |
+
assert fs.isfile(target_subfile1)
|
| 51 |
+
|
| 52 |
+
def test_copy_file_to_new_directory(
|
| 53 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
| 54 |
+
):
|
| 55 |
+
# Copy scenario 1b
|
| 56 |
+
source = fs_bulk_operations_scenario_0
|
| 57 |
+
|
| 58 |
+
target = fs_target
|
| 59 |
+
fs.mkdir(target)
|
| 60 |
+
|
| 61 |
+
fs.cp(
|
| 62 |
+
fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
|
| 63 |
+
) # Note trailing slash
|
| 64 |
+
assert fs.isdir(target)
|
| 65 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 66 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 67 |
+
|
| 68 |
+
def test_copy_file_to_file_in_existing_directory(
|
| 69 |
+
self,
|
| 70 |
+
fs,
|
| 71 |
+
fs_join,
|
| 72 |
+
fs_bulk_operations_scenario_0,
|
| 73 |
+
fs_target,
|
| 74 |
+
supports_empty_directories,
|
| 75 |
+
):
|
| 76 |
+
# Copy scenario 1c
|
| 77 |
+
source = fs_bulk_operations_scenario_0
|
| 78 |
+
|
| 79 |
+
target = fs_target
|
| 80 |
+
fs.mkdir(target)
|
| 81 |
+
if not supports_empty_directories:
|
| 82 |
+
# Force target directory to exist by adding a dummy file
|
| 83 |
+
fs.touch(fs_join(target, "dummy"))
|
| 84 |
+
assert fs.isdir(target)
|
| 85 |
+
|
| 86 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
|
| 87 |
+
assert fs.isfile(fs_join(target, "newfile"))
|
| 88 |
+
|
| 89 |
+
def test_copy_file_to_file_in_new_directory(
|
| 90 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
| 91 |
+
):
|
| 92 |
+
# Copy scenario 1d
|
| 93 |
+
source = fs_bulk_operations_scenario_0
|
| 94 |
+
|
| 95 |
+
target = fs_target
|
| 96 |
+
fs.mkdir(target)
|
| 97 |
+
|
| 98 |
+
fs.cp(
|
| 99 |
+
fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
|
| 100 |
+
)
|
| 101 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 102 |
+
assert fs.isfile(fs_join(target, "newdir", "newfile"))
|
| 103 |
+
|
| 104 |
+
def test_copy_directory_to_existing_directory(
|
| 105 |
+
self,
|
| 106 |
+
fs,
|
| 107 |
+
fs_join,
|
| 108 |
+
fs_bulk_operations_scenario_0,
|
| 109 |
+
fs_target,
|
| 110 |
+
supports_empty_directories,
|
| 111 |
+
):
|
| 112 |
+
# Copy scenario 1e
|
| 113 |
+
source = fs_bulk_operations_scenario_0
|
| 114 |
+
|
| 115 |
+
target = fs_target
|
| 116 |
+
fs.mkdir(target)
|
| 117 |
+
if not supports_empty_directories:
|
| 118 |
+
# Force target directory to exist by adding a dummy file
|
| 119 |
+
dummy = fs_join(target, "dummy")
|
| 120 |
+
fs.touch(dummy)
|
| 121 |
+
assert fs.isdir(target)
|
| 122 |
+
|
| 123 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
| 124 |
+
s = fs_join(source, "subdir")
|
| 125 |
+
if source_slash:
|
| 126 |
+
s += "/"
|
| 127 |
+
t = target + "/" if target_slash else target
|
| 128 |
+
|
| 129 |
+
# Without recursive does nothing
|
| 130 |
+
fs.cp(s, t)
|
| 131 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 132 |
+
|
| 133 |
+
# With recursive
|
| 134 |
+
fs.cp(s, t, recursive=True)
|
| 135 |
+
if source_slash:
|
| 136 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 137 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 138 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
| 139 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
| 140 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 141 |
+
|
| 142 |
+
fs.rm(
|
| 143 |
+
[
|
| 144 |
+
fs_join(target, "subfile1"),
|
| 145 |
+
fs_join(target, "subfile2"),
|
| 146 |
+
fs_join(target, "nesteddir"),
|
| 147 |
+
],
|
| 148 |
+
recursive=True,
|
| 149 |
+
)
|
| 150 |
+
else:
|
| 151 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
| 152 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
| 153 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
| 154 |
+
assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
|
| 155 |
+
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
|
| 156 |
+
|
| 157 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
| 158 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 159 |
+
|
| 160 |
+
# Limit recursive by maxdepth
|
| 161 |
+
fs.cp(s, t, recursive=True, maxdepth=1)
|
| 162 |
+
if source_slash:
|
| 163 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 164 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 165 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
| 166 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 167 |
+
|
| 168 |
+
fs.rm(
|
| 169 |
+
[
|
| 170 |
+
fs_join(target, "subfile1"),
|
| 171 |
+
fs_join(target, "subfile2"),
|
| 172 |
+
],
|
| 173 |
+
recursive=True,
|
| 174 |
+
)
|
| 175 |
+
else:
|
| 176 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
| 177 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
| 178 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
| 179 |
+
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
|
| 180 |
+
|
| 181 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
| 182 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 183 |
+
|
| 184 |
+
def test_copy_directory_to_new_directory(
|
| 185 |
+
self,
|
| 186 |
+
fs,
|
| 187 |
+
fs_join,
|
| 188 |
+
fs_bulk_operations_scenario_0,
|
| 189 |
+
fs_target,
|
| 190 |
+
supports_empty_directories,
|
| 191 |
+
):
|
| 192 |
+
# Copy scenario 1f
|
| 193 |
+
source = fs_bulk_operations_scenario_0
|
| 194 |
+
|
| 195 |
+
target = fs_target
|
| 196 |
+
fs.mkdir(target)
|
| 197 |
+
|
| 198 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
| 199 |
+
s = fs_join(source, "subdir")
|
| 200 |
+
if source_slash:
|
| 201 |
+
s += "/"
|
| 202 |
+
t = fs_join(target, "newdir")
|
| 203 |
+
if target_slash:
|
| 204 |
+
t += "/"
|
| 205 |
+
|
| 206 |
+
# Without recursive does nothing
|
| 207 |
+
fs.cp(s, t)
|
| 208 |
+
if supports_empty_directories:
|
| 209 |
+
assert fs.ls(target) == []
|
| 210 |
+
else:
|
| 211 |
+
with pytest.raises(FileNotFoundError):
|
| 212 |
+
fs.ls(target)
|
| 213 |
+
|
| 214 |
+
# With recursive
|
| 215 |
+
fs.cp(s, t, recursive=True)
|
| 216 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 217 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 218 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 219 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
| 220 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
| 221 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 222 |
+
|
| 223 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 224 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 225 |
+
|
| 226 |
+
# Limit recursive by maxdepth
|
| 227 |
+
fs.cp(s, t, recursive=True, maxdepth=1)
|
| 228 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 229 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 230 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 231 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
| 232 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 233 |
+
|
| 234 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 235 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 236 |
+
|
| 237 |
+
def test_copy_glob_to_existing_directory(
|
| 238 |
+
self,
|
| 239 |
+
fs,
|
| 240 |
+
fs_join,
|
| 241 |
+
fs_bulk_operations_scenario_0,
|
| 242 |
+
fs_target,
|
| 243 |
+
supports_empty_directories,
|
| 244 |
+
):
|
| 245 |
+
# Copy scenario 1g
|
| 246 |
+
source = fs_bulk_operations_scenario_0
|
| 247 |
+
|
| 248 |
+
target = fs_target
|
| 249 |
+
fs.mkdir(target)
|
| 250 |
+
if not supports_empty_directories:
|
| 251 |
+
# Force target directory to exist by adding a dummy file
|
| 252 |
+
dummy = fs_join(target, "dummy")
|
| 253 |
+
fs.touch(dummy)
|
| 254 |
+
assert fs.isdir(target)
|
| 255 |
+
|
| 256 |
+
for target_slash in [False, True]:
|
| 257 |
+
t = target + "/" if target_slash else target
|
| 258 |
+
|
| 259 |
+
# Without recursive
|
| 260 |
+
fs.cp(fs_join(source, "subdir", "*"), t)
|
| 261 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 262 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 263 |
+
assert not fs.isdir(fs_join(target, "nesteddir"))
|
| 264 |
+
assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
|
| 265 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 266 |
+
|
| 267 |
+
fs.rm(
|
| 268 |
+
[
|
| 269 |
+
fs_join(target, "subfile1"),
|
| 270 |
+
fs_join(target, "subfile2"),
|
| 271 |
+
],
|
| 272 |
+
recursive=True,
|
| 273 |
+
)
|
| 274 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 275 |
+
|
| 276 |
+
# With recursive
|
| 277 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
| 278 |
+
fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
|
| 279 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 280 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 281 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
| 282 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
| 283 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 284 |
+
|
| 285 |
+
fs.rm(
|
| 286 |
+
[
|
| 287 |
+
fs_join(target, "subfile1"),
|
| 288 |
+
fs_join(target, "subfile2"),
|
| 289 |
+
fs_join(target, "nesteddir"),
|
| 290 |
+
],
|
| 291 |
+
recursive=True,
|
| 292 |
+
)
|
| 293 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 294 |
+
|
| 295 |
+
# Limit recursive by maxdepth
|
| 296 |
+
fs.cp(
|
| 297 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
| 298 |
+
)
|
| 299 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 300 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 301 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
| 302 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 303 |
+
|
| 304 |
+
fs.rm(
|
| 305 |
+
[
|
| 306 |
+
fs_join(target, "subfile1"),
|
| 307 |
+
fs_join(target, "subfile2"),
|
| 308 |
+
],
|
| 309 |
+
recursive=True,
|
| 310 |
+
)
|
| 311 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 312 |
+
|
| 313 |
+
def test_copy_glob_to_new_directory(
|
| 314 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
| 315 |
+
):
|
| 316 |
+
# Copy scenario 1h
|
| 317 |
+
source = fs_bulk_operations_scenario_0
|
| 318 |
+
|
| 319 |
+
target = fs_target
|
| 320 |
+
fs.mkdir(target)
|
| 321 |
+
|
| 322 |
+
for target_slash in [False, True]:
|
| 323 |
+
t = fs_join(target, "newdir")
|
| 324 |
+
if target_slash:
|
| 325 |
+
t += "/"
|
| 326 |
+
|
| 327 |
+
# Without recursive
|
| 328 |
+
fs.cp(fs_join(source, "subdir", "*"), t)
|
| 329 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 330 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 331 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 332 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
| 333 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
| 334 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 335 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
| 336 |
+
|
| 337 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 338 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 339 |
+
|
| 340 |
+
# With recursive
|
| 341 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
| 342 |
+
fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
|
| 343 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 344 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 345 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 346 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
| 347 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
| 348 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 349 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
| 350 |
+
|
| 351 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 352 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 353 |
+
|
| 354 |
+
# Limit recursive by maxdepth
|
| 355 |
+
fs.cp(
|
| 356 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
| 357 |
+
)
|
| 358 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 359 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 360 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 361 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
| 362 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 363 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
| 364 |
+
|
| 365 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 366 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 367 |
+
|
| 368 |
+
@pytest.mark.parametrize(
|
| 369 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
| 370 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
| 371 |
+
)
|
| 372 |
+
def test_copy_glob_edge_cases(
|
| 373 |
+
self,
|
| 374 |
+
path,
|
| 375 |
+
recursive,
|
| 376 |
+
maxdepth,
|
| 377 |
+
expected,
|
| 378 |
+
fs,
|
| 379 |
+
fs_join,
|
| 380 |
+
fs_glob_edge_cases_files,
|
| 381 |
+
fs_target,
|
| 382 |
+
fs_sanitize_path,
|
| 383 |
+
):
|
| 384 |
+
# Copy scenario 1g
|
| 385 |
+
source = fs_glob_edge_cases_files
|
| 386 |
+
|
| 387 |
+
target = fs_target
|
| 388 |
+
|
| 389 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
| 390 |
+
fs.mkdir(target)
|
| 391 |
+
|
| 392 |
+
t = fs_join(target, "newdir") if new_dir else target
|
| 393 |
+
t = t + "/" if target_slash else t
|
| 394 |
+
|
| 395 |
+
fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
| 396 |
+
|
| 397 |
+
output = fs.find(target)
|
| 398 |
+
if new_dir:
|
| 399 |
+
prefixed_expected = [
|
| 400 |
+
fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
|
| 401 |
+
]
|
| 402 |
+
else:
|
| 403 |
+
prefixed_expected = [
|
| 404 |
+
fs_sanitize_path(fs_join(target, p)) for p in expected
|
| 405 |
+
]
|
| 406 |
+
assert sorted(output) == sorted(prefixed_expected)
|
| 407 |
+
|
| 408 |
+
try:
|
| 409 |
+
fs.rm(target, recursive=True)
|
| 410 |
+
except FileNotFoundError:
|
| 411 |
+
pass
|
| 412 |
+
|
| 413 |
+
def test_copy_list_of_files_to_existing_directory(
|
| 414 |
+
self,
|
| 415 |
+
fs,
|
| 416 |
+
fs_join,
|
| 417 |
+
fs_bulk_operations_scenario_0,
|
| 418 |
+
fs_target,
|
| 419 |
+
supports_empty_directories,
|
| 420 |
+
):
|
| 421 |
+
# Copy scenario 2a
|
| 422 |
+
source = fs_bulk_operations_scenario_0
|
| 423 |
+
|
| 424 |
+
target = fs_target
|
| 425 |
+
fs.mkdir(target)
|
| 426 |
+
if not supports_empty_directories:
|
| 427 |
+
# Force target directory to exist by adding a dummy file
|
| 428 |
+
dummy = fs_join(target, "dummy")
|
| 429 |
+
fs.touch(dummy)
|
| 430 |
+
assert fs.isdir(target)
|
| 431 |
+
|
| 432 |
+
source_files = [
|
| 433 |
+
fs_join(source, "file1"),
|
| 434 |
+
fs_join(source, "file2"),
|
| 435 |
+
fs_join(source, "subdir", "subfile1"),
|
| 436 |
+
]
|
| 437 |
+
|
| 438 |
+
for target_slash in [False, True]:
|
| 439 |
+
t = target + "/" if target_slash else target
|
| 440 |
+
|
| 441 |
+
fs.cp(source_files, t)
|
| 442 |
+
assert fs.isfile(fs_join(target, "file1"))
|
| 443 |
+
assert fs.isfile(fs_join(target, "file2"))
|
| 444 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 445 |
+
|
| 446 |
+
fs.rm(
|
| 447 |
+
[
|
| 448 |
+
fs_join(target, "file1"),
|
| 449 |
+
fs_join(target, "file2"),
|
| 450 |
+
fs_join(target, "subfile1"),
|
| 451 |
+
],
|
| 452 |
+
recursive=True,
|
| 453 |
+
)
|
| 454 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 455 |
+
|
| 456 |
+
def test_copy_list_of_files_to_new_directory(
|
| 457 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
| 458 |
+
):
|
| 459 |
+
# Copy scenario 2b
|
| 460 |
+
source = fs_bulk_operations_scenario_0
|
| 461 |
+
|
| 462 |
+
target = fs_target
|
| 463 |
+
fs.mkdir(target)
|
| 464 |
+
|
| 465 |
+
source_files = [
|
| 466 |
+
fs_join(source, "file1"),
|
| 467 |
+
fs_join(source, "file2"),
|
| 468 |
+
fs_join(source, "subdir", "subfile1"),
|
| 469 |
+
]
|
| 470 |
+
|
| 471 |
+
fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
|
| 472 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 473 |
+
assert fs.isfile(fs_join(target, "newdir", "file1"))
|
| 474 |
+
assert fs.isfile(fs_join(target, "newdir", "file2"))
|
| 475 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 476 |
+
|
| 477 |
+
def test_copy_two_files_new_directory(
|
| 478 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
| 479 |
+
):
|
| 480 |
+
# This is a duplicate of test_copy_list_of_files_to_new_directory and
|
| 481 |
+
# can eventually be removed.
|
| 482 |
+
source = fs_bulk_operations_scenario_0
|
| 483 |
+
|
| 484 |
+
target = fs_target
|
| 485 |
+
assert not fs.exists(target)
|
| 486 |
+
fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
|
| 487 |
+
|
| 488 |
+
assert fs.isdir(target)
|
| 489 |
+
assert fs.isfile(fs_join(target, "file1"))
|
| 490 |
+
assert fs.isfile(fs_join(target, "file2"))
|
| 491 |
+
|
| 492 |
+
def test_copy_directory_without_files_with_same_name_prefix(
|
| 493 |
+
self,
|
| 494 |
+
fs,
|
| 495 |
+
fs_join,
|
| 496 |
+
fs_target,
|
| 497 |
+
fs_dir_and_file_with_same_name_prefix,
|
| 498 |
+
supports_empty_directories,
|
| 499 |
+
):
|
| 500 |
+
# Create the test dirs
|
| 501 |
+
source = fs_dir_and_file_with_same_name_prefix
|
| 502 |
+
target = fs_target
|
| 503 |
+
|
| 504 |
+
# Test without glob
|
| 505 |
+
fs.cp(fs_join(source, "subdir"), target, recursive=True)
|
| 506 |
+
|
| 507 |
+
assert fs.isfile(fs_join(target, "subfile.txt"))
|
| 508 |
+
assert not fs.isfile(fs_join(target, "subdir.txt"))
|
| 509 |
+
|
| 510 |
+
fs.rm([fs_join(target, "subfile.txt")])
|
| 511 |
+
if supports_empty_directories:
|
| 512 |
+
assert fs.ls(target) == []
|
| 513 |
+
else:
|
| 514 |
+
assert not fs.exists(target)
|
| 515 |
+
|
| 516 |
+
# Test with glob
|
| 517 |
+
fs.cp(fs_join(source, "subdir*"), target, recursive=True)
|
| 518 |
+
|
| 519 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
| 520 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
|
| 521 |
+
assert fs.isfile(fs_join(target, "subdir.txt"))
|
| 522 |
+
|
| 523 |
+
def test_copy_with_source_and_destination_as_list(
|
| 524 |
+
self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
|
| 525 |
+
):
|
| 526 |
+
# Create the test dir
|
| 527 |
+
source = fs_10_files_with_hashed_names
|
| 528 |
+
target = fs_target
|
| 529 |
+
|
| 530 |
+
# Create list of files for source and destination
|
| 531 |
+
source_files = []
|
| 532 |
+
destination_files = []
|
| 533 |
+
for i in range(10):
|
| 534 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
| 535 |
+
source_files.append(fs_join(source, f"{hashed_i}.txt"))
|
| 536 |
+
destination_files.append(fs_join(target, f"{hashed_i}.txt"))
|
| 537 |
+
|
| 538 |
+
# Copy and assert order was kept
|
| 539 |
+
fs.copy(path1=source_files, path2=destination_files)
|
| 540 |
+
|
| 541 |
+
for i in range(10):
|
| 542 |
+
file_content = fs.cat(destination_files[i]).decode("utf-8")
|
| 543 |
+
assert file_content == str(i)
|
lib/python3.11/site-packages/fsspec/tests/abstract/get.py
ADDED
|
@@ -0,0 +1,587 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from hashlib import md5
|
| 2 |
+
from itertools import product
|
| 3 |
+
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from fsspec.implementations.local import make_path_posix
|
| 7 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class AbstractGetTests:
|
| 11 |
+
def test_get_file_to_existing_directory(
|
| 12 |
+
self,
|
| 13 |
+
fs,
|
| 14 |
+
fs_join,
|
| 15 |
+
fs_bulk_operations_scenario_0,
|
| 16 |
+
local_fs,
|
| 17 |
+
local_join,
|
| 18 |
+
local_target,
|
| 19 |
+
):
|
| 20 |
+
# Copy scenario 1a
|
| 21 |
+
source = fs_bulk_operations_scenario_0
|
| 22 |
+
|
| 23 |
+
target = local_target
|
| 24 |
+
local_fs.mkdir(target)
|
| 25 |
+
assert local_fs.isdir(target)
|
| 26 |
+
|
| 27 |
+
target_file2 = local_join(target, "file2")
|
| 28 |
+
target_subfile1 = local_join(target, "subfile1")
|
| 29 |
+
|
| 30 |
+
# Copy from source directory
|
| 31 |
+
fs.get(fs_join(source, "file2"), target)
|
| 32 |
+
assert local_fs.isfile(target_file2)
|
| 33 |
+
|
| 34 |
+
# Copy from sub directory
|
| 35 |
+
fs.get(fs_join(source, "subdir", "subfile1"), target)
|
| 36 |
+
assert local_fs.isfile(target_subfile1)
|
| 37 |
+
|
| 38 |
+
# Remove copied files
|
| 39 |
+
local_fs.rm([target_file2, target_subfile1])
|
| 40 |
+
assert not local_fs.exists(target_file2)
|
| 41 |
+
assert not local_fs.exists(target_subfile1)
|
| 42 |
+
|
| 43 |
+
# Repeat with trailing slash on target
|
| 44 |
+
fs.get(fs_join(source, "file2"), target + "/")
|
| 45 |
+
assert local_fs.isdir(target)
|
| 46 |
+
assert local_fs.isfile(target_file2)
|
| 47 |
+
|
| 48 |
+
fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
|
| 49 |
+
assert local_fs.isfile(target_subfile1)
|
| 50 |
+
|
| 51 |
+
def test_get_file_to_new_directory(
|
| 52 |
+
self,
|
| 53 |
+
fs,
|
| 54 |
+
fs_join,
|
| 55 |
+
fs_bulk_operations_scenario_0,
|
| 56 |
+
local_fs,
|
| 57 |
+
local_join,
|
| 58 |
+
local_target,
|
| 59 |
+
):
|
| 60 |
+
# Copy scenario 1b
|
| 61 |
+
source = fs_bulk_operations_scenario_0
|
| 62 |
+
|
| 63 |
+
target = local_target
|
| 64 |
+
local_fs.mkdir(target)
|
| 65 |
+
|
| 66 |
+
fs.get(
|
| 67 |
+
fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
|
| 68 |
+
) # Note trailing slash
|
| 69 |
+
|
| 70 |
+
assert local_fs.isdir(target)
|
| 71 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
| 72 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
| 73 |
+
|
| 74 |
+
def test_get_file_to_file_in_existing_directory(
|
| 75 |
+
self,
|
| 76 |
+
fs,
|
| 77 |
+
fs_join,
|
| 78 |
+
fs_bulk_operations_scenario_0,
|
| 79 |
+
local_fs,
|
| 80 |
+
local_join,
|
| 81 |
+
local_target,
|
| 82 |
+
):
|
| 83 |
+
# Copy scenario 1c
|
| 84 |
+
source = fs_bulk_operations_scenario_0
|
| 85 |
+
|
| 86 |
+
target = local_target
|
| 87 |
+
local_fs.mkdir(target)
|
| 88 |
+
|
| 89 |
+
fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
|
| 90 |
+
assert local_fs.isfile(local_join(target, "newfile"))
|
| 91 |
+
|
| 92 |
+
def test_get_file_to_file_in_new_directory(
|
| 93 |
+
self,
|
| 94 |
+
fs,
|
| 95 |
+
fs_join,
|
| 96 |
+
fs_bulk_operations_scenario_0,
|
| 97 |
+
local_fs,
|
| 98 |
+
local_join,
|
| 99 |
+
local_target,
|
| 100 |
+
):
|
| 101 |
+
# Copy scenario 1d
|
| 102 |
+
source = fs_bulk_operations_scenario_0
|
| 103 |
+
|
| 104 |
+
target = local_target
|
| 105 |
+
local_fs.mkdir(target)
|
| 106 |
+
|
| 107 |
+
fs.get(
|
| 108 |
+
fs_join(source, "subdir", "subfile1"),
|
| 109 |
+
local_join(target, "newdir", "newfile"),
|
| 110 |
+
)
|
| 111 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
| 112 |
+
assert local_fs.isfile(local_join(target, "newdir", "newfile"))
|
| 113 |
+
|
| 114 |
+
def test_get_directory_to_existing_directory(
|
| 115 |
+
self,
|
| 116 |
+
fs,
|
| 117 |
+
fs_join,
|
| 118 |
+
fs_bulk_operations_scenario_0,
|
| 119 |
+
local_fs,
|
| 120 |
+
local_join,
|
| 121 |
+
local_target,
|
| 122 |
+
):
|
| 123 |
+
# Copy scenario 1e
|
| 124 |
+
source = fs_bulk_operations_scenario_0
|
| 125 |
+
|
| 126 |
+
target = local_target
|
| 127 |
+
local_fs.mkdir(target)
|
| 128 |
+
assert local_fs.isdir(target)
|
| 129 |
+
|
| 130 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
| 131 |
+
s = fs_join(source, "subdir")
|
| 132 |
+
if source_slash:
|
| 133 |
+
s += "/"
|
| 134 |
+
t = target + "/" if target_slash else target
|
| 135 |
+
|
| 136 |
+
# Without recursive does nothing
|
| 137 |
+
fs.get(s, t)
|
| 138 |
+
assert local_fs.ls(target) == []
|
| 139 |
+
|
| 140 |
+
# With recursive
|
| 141 |
+
fs.get(s, t, recursive=True)
|
| 142 |
+
if source_slash:
|
| 143 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
| 144 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
| 145 |
+
assert local_fs.isdir(local_join(target, "nesteddir"))
|
| 146 |
+
assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
|
| 147 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 148 |
+
|
| 149 |
+
local_fs.rm(
|
| 150 |
+
[
|
| 151 |
+
local_join(target, "subfile1"),
|
| 152 |
+
local_join(target, "subfile2"),
|
| 153 |
+
local_join(target, "nesteddir"),
|
| 154 |
+
],
|
| 155 |
+
recursive=True,
|
| 156 |
+
)
|
| 157 |
+
else:
|
| 158 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
| 159 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
|
| 160 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
|
| 161 |
+
assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
|
| 162 |
+
assert local_fs.isfile(
|
| 163 |
+
local_join(target, "subdir", "nesteddir", "nestedfile")
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
local_fs.rm(local_join(target, "subdir"), recursive=True)
|
| 167 |
+
assert local_fs.ls(target) == []
|
| 168 |
+
|
| 169 |
+
# Limit recursive by maxdepth
|
| 170 |
+
fs.get(s, t, recursive=True, maxdepth=1)
|
| 171 |
+
if source_slash:
|
| 172 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
| 173 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
| 174 |
+
assert not local_fs.exists(local_join(target, "nesteddir"))
|
| 175 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 176 |
+
|
| 177 |
+
local_fs.rm(
|
| 178 |
+
[
|
| 179 |
+
local_join(target, "subfile1"),
|
| 180 |
+
local_join(target, "subfile2"),
|
| 181 |
+
],
|
| 182 |
+
recursive=True,
|
| 183 |
+
)
|
| 184 |
+
else:
|
| 185 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
| 186 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
|
| 187 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
|
| 188 |
+
assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
|
| 189 |
+
|
| 190 |
+
local_fs.rm(local_join(target, "subdir"), recursive=True)
|
| 191 |
+
assert local_fs.ls(target) == []
|
| 192 |
+
|
| 193 |
+
def test_get_directory_to_new_directory(
|
| 194 |
+
self,
|
| 195 |
+
fs,
|
| 196 |
+
fs_join,
|
| 197 |
+
fs_bulk_operations_scenario_0,
|
| 198 |
+
local_fs,
|
| 199 |
+
local_join,
|
| 200 |
+
local_target,
|
| 201 |
+
):
|
| 202 |
+
# Copy scenario 1f
|
| 203 |
+
source = fs_bulk_operations_scenario_0
|
| 204 |
+
|
| 205 |
+
target = local_target
|
| 206 |
+
local_fs.mkdir(target)
|
| 207 |
+
|
| 208 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
| 209 |
+
s = fs_join(source, "subdir")
|
| 210 |
+
if source_slash:
|
| 211 |
+
s += "/"
|
| 212 |
+
t = local_join(target, "newdir")
|
| 213 |
+
if target_slash:
|
| 214 |
+
t += "/"
|
| 215 |
+
|
| 216 |
+
# Without recursive does nothing
|
| 217 |
+
fs.get(s, t)
|
| 218 |
+
assert local_fs.ls(target) == []
|
| 219 |
+
|
| 220 |
+
# With recursive
|
| 221 |
+
fs.get(s, t, recursive=True)
|
| 222 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
| 223 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
| 224 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
| 225 |
+
assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
|
| 226 |
+
assert local_fs.isfile(
|
| 227 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
| 228 |
+
)
|
| 229 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 230 |
+
|
| 231 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
| 232 |
+
assert local_fs.ls(target) == []
|
| 233 |
+
|
| 234 |
+
# Limit recursive by maxdepth
|
| 235 |
+
fs.get(s, t, recursive=True, maxdepth=1)
|
| 236 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
| 237 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
| 238 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
| 239 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
| 240 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 241 |
+
|
| 242 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
| 243 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
| 244 |
+
|
| 245 |
+
def test_get_glob_to_existing_directory(
|
| 246 |
+
self,
|
| 247 |
+
fs,
|
| 248 |
+
fs_join,
|
| 249 |
+
fs_bulk_operations_scenario_0,
|
| 250 |
+
local_fs,
|
| 251 |
+
local_join,
|
| 252 |
+
local_target,
|
| 253 |
+
):
|
| 254 |
+
# Copy scenario 1g
|
| 255 |
+
source = fs_bulk_operations_scenario_0
|
| 256 |
+
|
| 257 |
+
target = local_target
|
| 258 |
+
local_fs.mkdir(target)
|
| 259 |
+
|
| 260 |
+
for target_slash in [False, True]:
|
| 261 |
+
t = target + "/" if target_slash else target
|
| 262 |
+
|
| 263 |
+
# Without recursive
|
| 264 |
+
fs.get(fs_join(source, "subdir", "*"), t)
|
| 265 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
| 266 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
| 267 |
+
assert not local_fs.isdir(local_join(target, "nesteddir"))
|
| 268 |
+
assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
|
| 269 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 270 |
+
|
| 271 |
+
local_fs.rm(
|
| 272 |
+
[
|
| 273 |
+
local_join(target, "subfile1"),
|
| 274 |
+
local_join(target, "subfile2"),
|
| 275 |
+
],
|
| 276 |
+
recursive=True,
|
| 277 |
+
)
|
| 278 |
+
assert local_fs.ls(target) == []
|
| 279 |
+
|
| 280 |
+
# With recursive
|
| 281 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
| 282 |
+
fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
|
| 283 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
| 284 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
| 285 |
+
assert local_fs.isdir(local_join(target, "nesteddir"))
|
| 286 |
+
assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
|
| 287 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 288 |
+
|
| 289 |
+
local_fs.rm(
|
| 290 |
+
[
|
| 291 |
+
local_join(target, "subfile1"),
|
| 292 |
+
local_join(target, "subfile2"),
|
| 293 |
+
local_join(target, "nesteddir"),
|
| 294 |
+
],
|
| 295 |
+
recursive=True,
|
| 296 |
+
)
|
| 297 |
+
assert local_fs.ls(target) == []
|
| 298 |
+
|
| 299 |
+
# Limit recursive by maxdepth
|
| 300 |
+
fs.get(
|
| 301 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
| 302 |
+
)
|
| 303 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
| 304 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
| 305 |
+
assert not local_fs.exists(local_join(target, "nesteddir"))
|
| 306 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 307 |
+
|
| 308 |
+
local_fs.rm(
|
| 309 |
+
[
|
| 310 |
+
local_join(target, "subfile1"),
|
| 311 |
+
local_join(target, "subfile2"),
|
| 312 |
+
],
|
| 313 |
+
recursive=True,
|
| 314 |
+
)
|
| 315 |
+
assert local_fs.ls(target) == []
|
| 316 |
+
|
| 317 |
+
def test_get_glob_to_new_directory(
|
| 318 |
+
self,
|
| 319 |
+
fs,
|
| 320 |
+
fs_join,
|
| 321 |
+
fs_bulk_operations_scenario_0,
|
| 322 |
+
local_fs,
|
| 323 |
+
local_join,
|
| 324 |
+
local_target,
|
| 325 |
+
):
|
| 326 |
+
# Copy scenario 1h
|
| 327 |
+
source = fs_bulk_operations_scenario_0
|
| 328 |
+
|
| 329 |
+
target = local_target
|
| 330 |
+
local_fs.mkdir(target)
|
| 331 |
+
|
| 332 |
+
for target_slash in [False, True]:
|
| 333 |
+
t = fs_join(target, "newdir")
|
| 334 |
+
if target_slash:
|
| 335 |
+
t += "/"
|
| 336 |
+
|
| 337 |
+
# Without recursive
|
| 338 |
+
fs.get(fs_join(source, "subdir", "*"), t)
|
| 339 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
| 340 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
| 341 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
| 342 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
| 343 |
+
assert not local_fs.exists(
|
| 344 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
| 345 |
+
)
|
| 346 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 347 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
| 348 |
+
|
| 349 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
| 350 |
+
assert local_fs.ls(target) == []
|
| 351 |
+
|
| 352 |
+
# With recursive
|
| 353 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
| 354 |
+
fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
|
| 355 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
| 356 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
| 357 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
| 358 |
+
assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
|
| 359 |
+
assert local_fs.isfile(
|
| 360 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
| 361 |
+
)
|
| 362 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 363 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
| 364 |
+
|
| 365 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
| 366 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
| 367 |
+
|
| 368 |
+
# Limit recursive by maxdepth
|
| 369 |
+
fs.get(
|
| 370 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
| 371 |
+
)
|
| 372 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
| 373 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
| 374 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
| 375 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
| 376 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
| 377 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
| 378 |
+
|
| 379 |
+
local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
|
| 380 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
| 381 |
+
|
| 382 |
+
@pytest.mark.parametrize(
|
| 383 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
| 384 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
| 385 |
+
)
|
| 386 |
+
def test_get_glob_edge_cases(
|
| 387 |
+
self,
|
| 388 |
+
path,
|
| 389 |
+
recursive,
|
| 390 |
+
maxdepth,
|
| 391 |
+
expected,
|
| 392 |
+
fs,
|
| 393 |
+
fs_join,
|
| 394 |
+
fs_glob_edge_cases_files,
|
| 395 |
+
local_fs,
|
| 396 |
+
local_join,
|
| 397 |
+
local_target,
|
| 398 |
+
):
|
| 399 |
+
# Copy scenario 1g
|
| 400 |
+
source = fs_glob_edge_cases_files
|
| 401 |
+
|
| 402 |
+
target = local_target
|
| 403 |
+
|
| 404 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
| 405 |
+
local_fs.mkdir(target)
|
| 406 |
+
|
| 407 |
+
t = local_join(target, "newdir") if new_dir else target
|
| 408 |
+
t = t + "/" if target_slash else t
|
| 409 |
+
|
| 410 |
+
fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
| 411 |
+
|
| 412 |
+
output = local_fs.find(target)
|
| 413 |
+
if new_dir:
|
| 414 |
+
prefixed_expected = [
|
| 415 |
+
make_path_posix(local_join(target, "newdir", p)) for p in expected
|
| 416 |
+
]
|
| 417 |
+
else:
|
| 418 |
+
prefixed_expected = [
|
| 419 |
+
make_path_posix(local_join(target, p)) for p in expected
|
| 420 |
+
]
|
| 421 |
+
assert sorted(output) == sorted(prefixed_expected)
|
| 422 |
+
|
| 423 |
+
try:
|
| 424 |
+
local_fs.rm(target, recursive=True)
|
| 425 |
+
except FileNotFoundError:
|
| 426 |
+
pass
|
| 427 |
+
|
| 428 |
+
def test_get_list_of_files_to_existing_directory(
|
| 429 |
+
self,
|
| 430 |
+
fs,
|
| 431 |
+
fs_join,
|
| 432 |
+
fs_bulk_operations_scenario_0,
|
| 433 |
+
local_fs,
|
| 434 |
+
local_join,
|
| 435 |
+
local_target,
|
| 436 |
+
):
|
| 437 |
+
# Copy scenario 2a
|
| 438 |
+
source = fs_bulk_operations_scenario_0
|
| 439 |
+
|
| 440 |
+
target = local_target
|
| 441 |
+
local_fs.mkdir(target)
|
| 442 |
+
|
| 443 |
+
source_files = [
|
| 444 |
+
fs_join(source, "file1"),
|
| 445 |
+
fs_join(source, "file2"),
|
| 446 |
+
fs_join(source, "subdir", "subfile1"),
|
| 447 |
+
]
|
| 448 |
+
|
| 449 |
+
for target_slash in [False, True]:
|
| 450 |
+
t = target + "/" if target_slash else target
|
| 451 |
+
|
| 452 |
+
fs.get(source_files, t)
|
| 453 |
+
assert local_fs.isfile(local_join(target, "file1"))
|
| 454 |
+
assert local_fs.isfile(local_join(target, "file2"))
|
| 455 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
| 456 |
+
|
| 457 |
+
local_fs.rm(
|
| 458 |
+
[
|
| 459 |
+
local_join(target, "file1"),
|
| 460 |
+
local_join(target, "file2"),
|
| 461 |
+
local_join(target, "subfile1"),
|
| 462 |
+
],
|
| 463 |
+
recursive=True,
|
| 464 |
+
)
|
| 465 |
+
assert local_fs.ls(target) == []
|
| 466 |
+
|
| 467 |
+
def test_get_list_of_files_to_new_directory(
|
| 468 |
+
self,
|
| 469 |
+
fs,
|
| 470 |
+
fs_join,
|
| 471 |
+
fs_bulk_operations_scenario_0,
|
| 472 |
+
local_fs,
|
| 473 |
+
local_join,
|
| 474 |
+
local_target,
|
| 475 |
+
):
|
| 476 |
+
# Copy scenario 2b
|
| 477 |
+
source = fs_bulk_operations_scenario_0
|
| 478 |
+
|
| 479 |
+
target = local_target
|
| 480 |
+
local_fs.mkdir(target)
|
| 481 |
+
|
| 482 |
+
source_files = [
|
| 483 |
+
fs_join(source, "file1"),
|
| 484 |
+
fs_join(source, "file2"),
|
| 485 |
+
fs_join(source, "subdir", "subfile1"),
|
| 486 |
+
]
|
| 487 |
+
|
| 488 |
+
fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash
|
| 489 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
| 490 |
+
assert local_fs.isfile(local_join(target, "newdir", "file1"))
|
| 491 |
+
assert local_fs.isfile(local_join(target, "newdir", "file2"))
|
| 492 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
| 493 |
+
|
| 494 |
+
def test_get_directory_recursive(
|
| 495 |
+
self, fs, fs_join, fs_path, local_fs, local_join, local_target
|
| 496 |
+
):
|
| 497 |
+
# https://github.com/fsspec/filesystem_spec/issues/1062
|
| 498 |
+
# Recursive cp/get/put of source directory into non-existent target directory.
|
| 499 |
+
src = fs_join(fs_path, "src")
|
| 500 |
+
src_file = fs_join(src, "file")
|
| 501 |
+
fs.mkdir(src)
|
| 502 |
+
fs.touch(src_file)
|
| 503 |
+
|
| 504 |
+
target = local_target
|
| 505 |
+
|
| 506 |
+
# get without slash
|
| 507 |
+
assert not local_fs.exists(target)
|
| 508 |
+
for loop in range(2):
|
| 509 |
+
fs.get(src, target, recursive=True)
|
| 510 |
+
assert local_fs.isdir(target)
|
| 511 |
+
|
| 512 |
+
if loop == 0:
|
| 513 |
+
assert local_fs.isfile(local_join(target, "file"))
|
| 514 |
+
assert not local_fs.exists(local_join(target, "src"))
|
| 515 |
+
else:
|
| 516 |
+
assert local_fs.isfile(local_join(target, "file"))
|
| 517 |
+
assert local_fs.isdir(local_join(target, "src"))
|
| 518 |
+
assert local_fs.isfile(local_join(target, "src", "file"))
|
| 519 |
+
|
| 520 |
+
local_fs.rm(target, recursive=True)
|
| 521 |
+
|
| 522 |
+
# get with slash
|
| 523 |
+
assert not local_fs.exists(target)
|
| 524 |
+
for loop in range(2):
|
| 525 |
+
fs.get(src + "/", target, recursive=True)
|
| 526 |
+
assert local_fs.isdir(target)
|
| 527 |
+
assert local_fs.isfile(local_join(target, "file"))
|
| 528 |
+
assert not local_fs.exists(local_join(target, "src"))
|
| 529 |
+
|
| 530 |
+
def test_get_directory_without_files_with_same_name_prefix(
|
| 531 |
+
self,
|
| 532 |
+
fs,
|
| 533 |
+
fs_join,
|
| 534 |
+
local_fs,
|
| 535 |
+
local_join,
|
| 536 |
+
local_target,
|
| 537 |
+
fs_dir_and_file_with_same_name_prefix,
|
| 538 |
+
):
|
| 539 |
+
# Create the test dirs
|
| 540 |
+
source = fs_dir_and_file_with_same_name_prefix
|
| 541 |
+
target = local_target
|
| 542 |
+
|
| 543 |
+
# Test without glob
|
| 544 |
+
fs.get(fs_join(source, "subdir"), target, recursive=True)
|
| 545 |
+
|
| 546 |
+
assert local_fs.isfile(local_join(target, "subfile.txt"))
|
| 547 |
+
assert not local_fs.isfile(local_join(target, "subdir.txt"))
|
| 548 |
+
|
| 549 |
+
local_fs.rm([local_join(target, "subfile.txt")])
|
| 550 |
+
assert local_fs.ls(target) == []
|
| 551 |
+
|
| 552 |
+
# Test with glob
|
| 553 |
+
fs.get(fs_join(source, "subdir*"), target, recursive=True)
|
| 554 |
+
|
| 555 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
| 556 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
|
| 557 |
+
assert local_fs.isfile(local_join(target, "subdir.txt"))
|
| 558 |
+
|
| 559 |
+
def test_get_with_source_and_destination_as_list(
|
| 560 |
+
self,
|
| 561 |
+
fs,
|
| 562 |
+
fs_join,
|
| 563 |
+
local_fs,
|
| 564 |
+
local_join,
|
| 565 |
+
local_target,
|
| 566 |
+
fs_10_files_with_hashed_names,
|
| 567 |
+
):
|
| 568 |
+
# Create the test dir
|
| 569 |
+
source = fs_10_files_with_hashed_names
|
| 570 |
+
target = local_target
|
| 571 |
+
|
| 572 |
+
# Create list of files for source and destination
|
| 573 |
+
source_files = []
|
| 574 |
+
destination_files = []
|
| 575 |
+
for i in range(10):
|
| 576 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
| 577 |
+
source_files.append(fs_join(source, f"{hashed_i}.txt"))
|
| 578 |
+
destination_files.append(
|
| 579 |
+
make_path_posix(local_join(target, f"{hashed_i}.txt"))
|
| 580 |
+
)
|
| 581 |
+
|
| 582 |
+
# Copy and assert order was kept
|
| 583 |
+
fs.get(rpath=source_files, lpath=destination_files)
|
| 584 |
+
|
| 585 |
+
for i in range(10):
|
| 586 |
+
file_content = local_fs.cat(destination_files[i]).decode("utf-8")
|
| 587 |
+
assert file_content == str(i)
|
lib/python3.11/site-packages/fsspec/tests/abstract/put.py
ADDED
|
@@ -0,0 +1,577 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from hashlib import md5
|
| 2 |
+
from itertools import product
|
| 3 |
+
|
| 4 |
+
import pytest
|
| 5 |
+
|
| 6 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class AbstractPutTests:
|
| 10 |
+
def test_put_file_to_existing_directory(
|
| 11 |
+
self,
|
| 12 |
+
fs,
|
| 13 |
+
fs_join,
|
| 14 |
+
fs_target,
|
| 15 |
+
local_join,
|
| 16 |
+
local_bulk_operations_scenario_0,
|
| 17 |
+
supports_empty_directories,
|
| 18 |
+
):
|
| 19 |
+
# Copy scenario 1a
|
| 20 |
+
source = local_bulk_operations_scenario_0
|
| 21 |
+
|
| 22 |
+
target = fs_target
|
| 23 |
+
fs.mkdir(target)
|
| 24 |
+
if not supports_empty_directories:
|
| 25 |
+
# Force target directory to exist by adding a dummy file
|
| 26 |
+
fs.touch(fs_join(target, "dummy"))
|
| 27 |
+
assert fs.isdir(target)
|
| 28 |
+
|
| 29 |
+
target_file2 = fs_join(target, "file2")
|
| 30 |
+
target_subfile1 = fs_join(target, "subfile1")
|
| 31 |
+
|
| 32 |
+
# Copy from source directory
|
| 33 |
+
fs.put(local_join(source, "file2"), target)
|
| 34 |
+
assert fs.isfile(target_file2)
|
| 35 |
+
|
| 36 |
+
# Copy from sub directory
|
| 37 |
+
fs.put(local_join(source, "subdir", "subfile1"), target)
|
| 38 |
+
assert fs.isfile(target_subfile1)
|
| 39 |
+
|
| 40 |
+
# Remove copied files
|
| 41 |
+
fs.rm([target_file2, target_subfile1])
|
| 42 |
+
assert not fs.exists(target_file2)
|
| 43 |
+
assert not fs.exists(target_subfile1)
|
| 44 |
+
|
| 45 |
+
# Repeat with trailing slash on target
|
| 46 |
+
fs.put(local_join(source, "file2"), target + "/")
|
| 47 |
+
assert fs.isdir(target)
|
| 48 |
+
assert fs.isfile(target_file2)
|
| 49 |
+
|
| 50 |
+
fs.put(local_join(source, "subdir", "subfile1"), target + "/")
|
| 51 |
+
assert fs.isfile(target_subfile1)
|
| 52 |
+
|
| 53 |
+
def test_put_file_to_new_directory(
|
| 54 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
| 55 |
+
):
|
| 56 |
+
# Copy scenario 1b
|
| 57 |
+
source = local_bulk_operations_scenario_0
|
| 58 |
+
|
| 59 |
+
target = fs_target
|
| 60 |
+
fs.mkdir(target)
|
| 61 |
+
|
| 62 |
+
fs.put(
|
| 63 |
+
local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
|
| 64 |
+
) # Note trailing slash
|
| 65 |
+
assert fs.isdir(target)
|
| 66 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 67 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 68 |
+
|
| 69 |
+
def test_put_file_to_file_in_existing_directory(
|
| 70 |
+
self,
|
| 71 |
+
fs,
|
| 72 |
+
fs_join,
|
| 73 |
+
fs_target,
|
| 74 |
+
local_join,
|
| 75 |
+
supports_empty_directories,
|
| 76 |
+
local_bulk_operations_scenario_0,
|
| 77 |
+
):
|
| 78 |
+
# Copy scenario 1c
|
| 79 |
+
source = local_bulk_operations_scenario_0
|
| 80 |
+
|
| 81 |
+
target = fs_target
|
| 82 |
+
fs.mkdir(target)
|
| 83 |
+
if not supports_empty_directories:
|
| 84 |
+
# Force target directory to exist by adding a dummy file
|
| 85 |
+
fs.touch(fs_join(target, "dummy"))
|
| 86 |
+
assert fs.isdir(target)
|
| 87 |
+
|
| 88 |
+
fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
|
| 89 |
+
assert fs.isfile(fs_join(target, "newfile"))
|
| 90 |
+
|
| 91 |
+
def test_put_file_to_file_in_new_directory(
|
| 92 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
| 93 |
+
):
|
| 94 |
+
# Copy scenario 1d
|
| 95 |
+
source = local_bulk_operations_scenario_0
|
| 96 |
+
|
| 97 |
+
target = fs_target
|
| 98 |
+
fs.mkdir(target)
|
| 99 |
+
|
| 100 |
+
fs.put(
|
| 101 |
+
local_join(source, "subdir", "subfile1"),
|
| 102 |
+
fs_join(target, "newdir", "newfile"),
|
| 103 |
+
)
|
| 104 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 105 |
+
assert fs.isfile(fs_join(target, "newdir", "newfile"))
|
| 106 |
+
|
| 107 |
+
def test_put_directory_to_existing_directory(
|
| 108 |
+
self,
|
| 109 |
+
fs,
|
| 110 |
+
fs_join,
|
| 111 |
+
fs_target,
|
| 112 |
+
local_bulk_operations_scenario_0,
|
| 113 |
+
supports_empty_directories,
|
| 114 |
+
):
|
| 115 |
+
# Copy scenario 1e
|
| 116 |
+
source = local_bulk_operations_scenario_0
|
| 117 |
+
|
| 118 |
+
target = fs_target
|
| 119 |
+
fs.mkdir(target)
|
| 120 |
+
if not supports_empty_directories:
|
| 121 |
+
# Force target directory to exist by adding a dummy file
|
| 122 |
+
dummy = fs_join(target, "dummy")
|
| 123 |
+
fs.touch(dummy)
|
| 124 |
+
assert fs.isdir(target)
|
| 125 |
+
|
| 126 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
| 127 |
+
s = fs_join(source, "subdir")
|
| 128 |
+
if source_slash:
|
| 129 |
+
s += "/"
|
| 130 |
+
t = target + "/" if target_slash else target
|
| 131 |
+
|
| 132 |
+
# Without recursive does nothing
|
| 133 |
+
fs.put(s, t)
|
| 134 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 135 |
+
|
| 136 |
+
# With recursive
|
| 137 |
+
fs.put(s, t, recursive=True)
|
| 138 |
+
if source_slash:
|
| 139 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 140 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 141 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
| 142 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
| 143 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 144 |
+
|
| 145 |
+
fs.rm(
|
| 146 |
+
[
|
| 147 |
+
fs_join(target, "subfile1"),
|
| 148 |
+
fs_join(target, "subfile2"),
|
| 149 |
+
fs_join(target, "nesteddir"),
|
| 150 |
+
],
|
| 151 |
+
recursive=True,
|
| 152 |
+
)
|
| 153 |
+
else:
|
| 154 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
| 155 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
| 156 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
| 157 |
+
assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
|
| 158 |
+
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
|
| 159 |
+
|
| 160 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
| 161 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 162 |
+
|
| 163 |
+
# Limit recursive by maxdepth
|
| 164 |
+
fs.put(s, t, recursive=True, maxdepth=1)
|
| 165 |
+
if source_slash:
|
| 166 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 167 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 168 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
| 169 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 170 |
+
|
| 171 |
+
fs.rm(
|
| 172 |
+
[
|
| 173 |
+
fs_join(target, "subfile1"),
|
| 174 |
+
fs_join(target, "subfile2"),
|
| 175 |
+
],
|
| 176 |
+
recursive=True,
|
| 177 |
+
)
|
| 178 |
+
else:
|
| 179 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
| 180 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
| 181 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
| 182 |
+
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
|
| 183 |
+
|
| 184 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
| 185 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 186 |
+
|
| 187 |
+
def test_put_directory_to_new_directory(
|
| 188 |
+
self,
|
| 189 |
+
fs,
|
| 190 |
+
fs_join,
|
| 191 |
+
fs_target,
|
| 192 |
+
local_bulk_operations_scenario_0,
|
| 193 |
+
supports_empty_directories,
|
| 194 |
+
):
|
| 195 |
+
# Copy scenario 1f
|
| 196 |
+
source = local_bulk_operations_scenario_0
|
| 197 |
+
|
| 198 |
+
target = fs_target
|
| 199 |
+
fs.mkdir(target)
|
| 200 |
+
|
| 201 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
| 202 |
+
s = fs_join(source, "subdir")
|
| 203 |
+
if source_slash:
|
| 204 |
+
s += "/"
|
| 205 |
+
t = fs_join(target, "newdir")
|
| 206 |
+
if target_slash:
|
| 207 |
+
t += "/"
|
| 208 |
+
|
| 209 |
+
# Without recursive does nothing
|
| 210 |
+
fs.put(s, t)
|
| 211 |
+
if supports_empty_directories:
|
| 212 |
+
assert fs.ls(target) == []
|
| 213 |
+
else:
|
| 214 |
+
with pytest.raises(FileNotFoundError):
|
| 215 |
+
fs.ls(target)
|
| 216 |
+
|
| 217 |
+
# With recursive
|
| 218 |
+
fs.put(s, t, recursive=True)
|
| 219 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 220 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 221 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 222 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
| 223 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
| 224 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 225 |
+
|
| 226 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 227 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 228 |
+
|
| 229 |
+
# Limit recursive by maxdepth
|
| 230 |
+
fs.put(s, t, recursive=True, maxdepth=1)
|
| 231 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 232 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 233 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 234 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
| 235 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 236 |
+
|
| 237 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 238 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 239 |
+
|
| 240 |
+
def test_put_glob_to_existing_directory(
|
| 241 |
+
self,
|
| 242 |
+
fs,
|
| 243 |
+
fs_join,
|
| 244 |
+
fs_target,
|
| 245 |
+
local_join,
|
| 246 |
+
supports_empty_directories,
|
| 247 |
+
local_bulk_operations_scenario_0,
|
| 248 |
+
):
|
| 249 |
+
# Copy scenario 1g
|
| 250 |
+
source = local_bulk_operations_scenario_0
|
| 251 |
+
|
| 252 |
+
target = fs_target
|
| 253 |
+
fs.mkdir(target)
|
| 254 |
+
if not supports_empty_directories:
|
| 255 |
+
# Force target directory to exist by adding a dummy file
|
| 256 |
+
dummy = fs_join(target, "dummy")
|
| 257 |
+
fs.touch(dummy)
|
| 258 |
+
assert fs.isdir(target)
|
| 259 |
+
|
| 260 |
+
for target_slash in [False, True]:
|
| 261 |
+
t = target + "/" if target_slash else target
|
| 262 |
+
|
| 263 |
+
# Without recursive
|
| 264 |
+
fs.put(local_join(source, "subdir", "*"), t)
|
| 265 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 266 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 267 |
+
assert not fs.isdir(fs_join(target, "nesteddir"))
|
| 268 |
+
assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
|
| 269 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 270 |
+
|
| 271 |
+
fs.rm(
|
| 272 |
+
[
|
| 273 |
+
fs_join(target, "subfile1"),
|
| 274 |
+
fs_join(target, "subfile2"),
|
| 275 |
+
],
|
| 276 |
+
recursive=True,
|
| 277 |
+
)
|
| 278 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 279 |
+
|
| 280 |
+
# With recursive
|
| 281 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
| 282 |
+
fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
|
| 283 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 284 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 285 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
| 286 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
| 287 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 288 |
+
|
| 289 |
+
fs.rm(
|
| 290 |
+
[
|
| 291 |
+
fs_join(target, "subfile1"),
|
| 292 |
+
fs_join(target, "subfile2"),
|
| 293 |
+
fs_join(target, "nesteddir"),
|
| 294 |
+
],
|
| 295 |
+
recursive=True,
|
| 296 |
+
)
|
| 297 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 298 |
+
|
| 299 |
+
# Limit recursive by maxdepth
|
| 300 |
+
fs.put(
|
| 301 |
+
local_join(source, "subdir", glob),
|
| 302 |
+
t,
|
| 303 |
+
recursive=recursive,
|
| 304 |
+
maxdepth=1,
|
| 305 |
+
)
|
| 306 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 307 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
| 308 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
| 309 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 310 |
+
|
| 311 |
+
fs.rm(
|
| 312 |
+
[
|
| 313 |
+
fs_join(target, "subfile1"),
|
| 314 |
+
fs_join(target, "subfile2"),
|
| 315 |
+
],
|
| 316 |
+
recursive=True,
|
| 317 |
+
)
|
| 318 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 319 |
+
|
| 320 |
+
def test_put_glob_to_new_directory(
|
| 321 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
| 322 |
+
):
|
| 323 |
+
# Copy scenario 1h
|
| 324 |
+
source = local_bulk_operations_scenario_0
|
| 325 |
+
|
| 326 |
+
target = fs_target
|
| 327 |
+
fs.mkdir(target)
|
| 328 |
+
|
| 329 |
+
for target_slash in [False, True]:
|
| 330 |
+
t = fs_join(target, "newdir")
|
| 331 |
+
if target_slash:
|
| 332 |
+
t += "/"
|
| 333 |
+
|
| 334 |
+
# Without recursive
|
| 335 |
+
fs.put(local_join(source, "subdir", "*"), t)
|
| 336 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 337 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 338 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 339 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
| 340 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
| 341 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 342 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
| 343 |
+
|
| 344 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 345 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 346 |
+
|
| 347 |
+
# With recursive
|
| 348 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
| 349 |
+
fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
|
| 350 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 351 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 352 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 353 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
| 354 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
| 355 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 356 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
| 357 |
+
|
| 358 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 359 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 360 |
+
|
| 361 |
+
# Limit recursive by maxdepth
|
| 362 |
+
fs.put(
|
| 363 |
+
local_join(source, "subdir", glob),
|
| 364 |
+
t,
|
| 365 |
+
recursive=recursive,
|
| 366 |
+
maxdepth=1,
|
| 367 |
+
)
|
| 368 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 369 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 370 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
| 371 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
| 372 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
| 373 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
| 374 |
+
|
| 375 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
| 376 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
| 377 |
+
|
| 378 |
+
@pytest.mark.parametrize(
|
| 379 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
| 380 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
| 381 |
+
)
|
| 382 |
+
def test_put_glob_edge_cases(
|
| 383 |
+
self,
|
| 384 |
+
path,
|
| 385 |
+
recursive,
|
| 386 |
+
maxdepth,
|
| 387 |
+
expected,
|
| 388 |
+
fs,
|
| 389 |
+
fs_join,
|
| 390 |
+
fs_target,
|
| 391 |
+
local_glob_edge_cases_files,
|
| 392 |
+
local_join,
|
| 393 |
+
fs_sanitize_path,
|
| 394 |
+
):
|
| 395 |
+
# Copy scenario 1g
|
| 396 |
+
source = local_glob_edge_cases_files
|
| 397 |
+
|
| 398 |
+
target = fs_target
|
| 399 |
+
|
| 400 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
| 401 |
+
fs.mkdir(target)
|
| 402 |
+
|
| 403 |
+
t = fs_join(target, "newdir") if new_dir else target
|
| 404 |
+
t = t + "/" if target_slash else t
|
| 405 |
+
|
| 406 |
+
fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
| 407 |
+
|
| 408 |
+
output = fs.find(target)
|
| 409 |
+
if new_dir:
|
| 410 |
+
prefixed_expected = [
|
| 411 |
+
fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
|
| 412 |
+
]
|
| 413 |
+
else:
|
| 414 |
+
prefixed_expected = [
|
| 415 |
+
fs_sanitize_path(fs_join(target, p)) for p in expected
|
| 416 |
+
]
|
| 417 |
+
assert sorted(output) == sorted(prefixed_expected)
|
| 418 |
+
|
| 419 |
+
try:
|
| 420 |
+
fs.rm(target, recursive=True)
|
| 421 |
+
except FileNotFoundError:
|
| 422 |
+
pass
|
| 423 |
+
|
| 424 |
+
def test_put_list_of_files_to_existing_directory(
|
| 425 |
+
self,
|
| 426 |
+
fs,
|
| 427 |
+
fs_join,
|
| 428 |
+
fs_target,
|
| 429 |
+
local_join,
|
| 430 |
+
local_bulk_operations_scenario_0,
|
| 431 |
+
supports_empty_directories,
|
| 432 |
+
):
|
| 433 |
+
# Copy scenario 2a
|
| 434 |
+
source = local_bulk_operations_scenario_0
|
| 435 |
+
|
| 436 |
+
target = fs_target
|
| 437 |
+
fs.mkdir(target)
|
| 438 |
+
if not supports_empty_directories:
|
| 439 |
+
# Force target directory to exist by adding a dummy file
|
| 440 |
+
dummy = fs_join(target, "dummy")
|
| 441 |
+
fs.touch(dummy)
|
| 442 |
+
assert fs.isdir(target)
|
| 443 |
+
|
| 444 |
+
source_files = [
|
| 445 |
+
local_join(source, "file1"),
|
| 446 |
+
local_join(source, "file2"),
|
| 447 |
+
local_join(source, "subdir", "subfile1"),
|
| 448 |
+
]
|
| 449 |
+
|
| 450 |
+
for target_slash in [False, True]:
|
| 451 |
+
t = target + "/" if target_slash else target
|
| 452 |
+
|
| 453 |
+
fs.put(source_files, t)
|
| 454 |
+
assert fs.isfile(fs_join(target, "file1"))
|
| 455 |
+
assert fs.isfile(fs_join(target, "file2"))
|
| 456 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
| 457 |
+
|
| 458 |
+
fs.rm(
|
| 459 |
+
[
|
| 460 |
+
fs_join(target, "file1"),
|
| 461 |
+
fs_join(target, "file2"),
|
| 462 |
+
fs_join(target, "subfile1"),
|
| 463 |
+
],
|
| 464 |
+
recursive=True,
|
| 465 |
+
)
|
| 466 |
+
assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
|
| 467 |
+
|
| 468 |
+
def test_put_list_of_files_to_new_directory(
|
| 469 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
| 470 |
+
):
|
| 471 |
+
# Copy scenario 2b
|
| 472 |
+
source = local_bulk_operations_scenario_0
|
| 473 |
+
|
| 474 |
+
target = fs_target
|
| 475 |
+
fs.mkdir(target)
|
| 476 |
+
|
| 477 |
+
source_files = [
|
| 478 |
+
local_join(source, "file1"),
|
| 479 |
+
local_join(source, "file2"),
|
| 480 |
+
local_join(source, "subdir", "subfile1"),
|
| 481 |
+
]
|
| 482 |
+
|
| 483 |
+
fs.put(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
|
| 484 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
| 485 |
+
assert fs.isfile(fs_join(target, "newdir", "file1"))
|
| 486 |
+
assert fs.isfile(fs_join(target, "newdir", "file2"))
|
| 487 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
| 488 |
+
|
| 489 |
+
def test_put_directory_recursive(
|
| 490 |
+
self, fs, fs_join, fs_target, local_fs, local_join, local_path
|
| 491 |
+
):
|
| 492 |
+
# https://github.com/fsspec/filesystem_spec/issues/1062
|
| 493 |
+
# Recursive cp/get/put of source directory into non-existent target directory.
|
| 494 |
+
src = local_join(local_path, "src")
|
| 495 |
+
src_file = local_join(src, "file")
|
| 496 |
+
local_fs.mkdir(src)
|
| 497 |
+
local_fs.touch(src_file)
|
| 498 |
+
|
| 499 |
+
target = fs_target
|
| 500 |
+
|
| 501 |
+
# put without slash
|
| 502 |
+
assert not fs.exists(target)
|
| 503 |
+
for loop in range(2):
|
| 504 |
+
fs.put(src, target, recursive=True)
|
| 505 |
+
assert fs.isdir(target)
|
| 506 |
+
|
| 507 |
+
if loop == 0:
|
| 508 |
+
assert fs.isfile(fs_join(target, "file"))
|
| 509 |
+
assert not fs.exists(fs_join(target, "src"))
|
| 510 |
+
else:
|
| 511 |
+
assert fs.isfile(fs_join(target, "file"))
|
| 512 |
+
assert fs.isdir(fs_join(target, "src"))
|
| 513 |
+
assert fs.isfile(fs_join(target, "src", "file"))
|
| 514 |
+
|
| 515 |
+
fs.rm(target, recursive=True)
|
| 516 |
+
|
| 517 |
+
# put with slash
|
| 518 |
+
assert not fs.exists(target)
|
| 519 |
+
for loop in range(2):
|
| 520 |
+
fs.put(src + "/", target, recursive=True)
|
| 521 |
+
assert fs.isdir(target)
|
| 522 |
+
assert fs.isfile(fs_join(target, "file"))
|
| 523 |
+
assert not fs.exists(fs_join(target, "src"))
|
| 524 |
+
|
| 525 |
+
def test_put_directory_without_files_with_same_name_prefix(
|
| 526 |
+
self,
|
| 527 |
+
fs,
|
| 528 |
+
fs_join,
|
| 529 |
+
fs_target,
|
| 530 |
+
local_join,
|
| 531 |
+
local_dir_and_file_with_same_name_prefix,
|
| 532 |
+
supports_empty_directories,
|
| 533 |
+
):
|
| 534 |
+
# Create the test dirs
|
| 535 |
+
source = local_dir_and_file_with_same_name_prefix
|
| 536 |
+
target = fs_target
|
| 537 |
+
|
| 538 |
+
# Test without glob
|
| 539 |
+
fs.put(local_join(source, "subdir"), fs_target, recursive=True)
|
| 540 |
+
|
| 541 |
+
assert fs.isfile(fs_join(fs_target, "subfile.txt"))
|
| 542 |
+
assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
|
| 543 |
+
|
| 544 |
+
fs.rm([fs_join(target, "subfile.txt")])
|
| 545 |
+
if supports_empty_directories:
|
| 546 |
+
assert fs.ls(target) == []
|
| 547 |
+
else:
|
| 548 |
+
assert not fs.exists(target)
|
| 549 |
+
|
| 550 |
+
# Test with glob
|
| 551 |
+
fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
|
| 552 |
+
|
| 553 |
+
assert fs.isdir(fs_join(fs_target, "subdir"))
|
| 554 |
+
assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
|
| 555 |
+
assert fs.isfile(fs_join(fs_target, "subdir.txt"))
|
| 556 |
+
|
| 557 |
+
def test_copy_with_source_and_destination_as_list(
|
| 558 |
+
self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
|
| 559 |
+
):
|
| 560 |
+
# Create the test dir
|
| 561 |
+
source = local_10_files_with_hashed_names
|
| 562 |
+
target = fs_target
|
| 563 |
+
|
| 564 |
+
# Create list of files for source and destination
|
| 565 |
+
source_files = []
|
| 566 |
+
destination_files = []
|
| 567 |
+
for i in range(10):
|
| 568 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
| 569 |
+
source_files.append(local_join(source, f"{hashed_i}.txt"))
|
| 570 |
+
destination_files.append(fs_join(target, f"{hashed_i}.txt"))
|
| 571 |
+
|
| 572 |
+
# Copy and assert order was kept
|
| 573 |
+
fs.put(lpath=source_files, rpath=destination_files)
|
| 574 |
+
|
| 575 |
+
for i in range(10):
|
| 576 |
+
file_content = fs.cat(destination_files[i]).decode("utf-8")
|
| 577 |
+
assert file_content == str(i)
|
lib/python3.11/site-packages/fsspec/transaction.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections import deque
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Transaction:
|
| 5 |
+
"""Filesystem transaction write context
|
| 6 |
+
|
| 7 |
+
Gathers files for deferred commit or discard, so that several write
|
| 8 |
+
operations can be finalized semi-atomically. This works by having this
|
| 9 |
+
instance as the ``.transaction`` attribute of the given filesystem
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, fs):
|
| 13 |
+
"""
|
| 14 |
+
Parameters
|
| 15 |
+
----------
|
| 16 |
+
fs: FileSystem instance
|
| 17 |
+
"""
|
| 18 |
+
self.fs = fs
|
| 19 |
+
self.files = deque()
|
| 20 |
+
|
| 21 |
+
def __enter__(self):
|
| 22 |
+
self.start()
|
| 23 |
+
return self
|
| 24 |
+
|
| 25 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 26 |
+
"""End transaction and commit, if exit is not due to exception"""
|
| 27 |
+
# only commit if there was no exception
|
| 28 |
+
self.complete(commit=exc_type is None)
|
| 29 |
+
self.fs._intrans = False
|
| 30 |
+
self.fs._transaction = None
|
| 31 |
+
|
| 32 |
+
def start(self):
|
| 33 |
+
"""Start a transaction on this FileSystem"""
|
| 34 |
+
self.files = deque() # clean up after previous failed completions
|
| 35 |
+
self.fs._intrans = True
|
| 36 |
+
|
| 37 |
+
def complete(self, commit=True):
|
| 38 |
+
"""Finish transaction: commit or discard all deferred files"""
|
| 39 |
+
while self.files:
|
| 40 |
+
f = self.files.popleft()
|
| 41 |
+
if commit:
|
| 42 |
+
f.commit()
|
| 43 |
+
else:
|
| 44 |
+
f.discard()
|
| 45 |
+
self.fs._intrans = False
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class FileActor:
|
| 49 |
+
def __init__(self):
|
| 50 |
+
self.files = []
|
| 51 |
+
|
| 52 |
+
def commit(self):
|
| 53 |
+
for f in self.files:
|
| 54 |
+
f.commit()
|
| 55 |
+
self.files.clear()
|
| 56 |
+
|
| 57 |
+
def discard(self):
|
| 58 |
+
for f in self.files:
|
| 59 |
+
f.discard()
|
| 60 |
+
self.files.clear()
|
| 61 |
+
|
| 62 |
+
def append(self, f):
|
| 63 |
+
self.files.append(f)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class DaskTransaction(Transaction):
|
| 67 |
+
def __init__(self, fs):
|
| 68 |
+
"""
|
| 69 |
+
Parameters
|
| 70 |
+
----------
|
| 71 |
+
fs: FileSystem instance
|
| 72 |
+
"""
|
| 73 |
+
import distributed
|
| 74 |
+
|
| 75 |
+
super().__init__(fs)
|
| 76 |
+
client = distributed.default_client()
|
| 77 |
+
self.files = client.submit(FileActor, actor=True).result()
|
| 78 |
+
|
| 79 |
+
def complete(self, commit=True):
|
| 80 |
+
"""Finish transaction: commit or discard all deferred files"""
|
| 81 |
+
if commit:
|
| 82 |
+
self.files.commit().result()
|
| 83 |
+
else:
|
| 84 |
+
self.files.discard().result()
|
| 85 |
+
self.fs._intrans = False
|
lib/python3.11/site-packages/fsspec/utils.py
ADDED
|
@@ -0,0 +1,742 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
import logging
|
| 5 |
+
import math
|
| 6 |
+
import os
|
| 7 |
+
import pathlib
|
| 8 |
+
import re
|
| 9 |
+
import sys
|
| 10 |
+
import tempfile
|
| 11 |
+
from functools import partial
|
| 12 |
+
from hashlib import md5
|
| 13 |
+
from importlib.metadata import version
|
| 14 |
+
from typing import (
|
| 15 |
+
IO,
|
| 16 |
+
TYPE_CHECKING,
|
| 17 |
+
Any,
|
| 18 |
+
Callable,
|
| 19 |
+
Iterable,
|
| 20 |
+
Iterator,
|
| 21 |
+
Sequence,
|
| 22 |
+
TypeVar,
|
| 23 |
+
)
|
| 24 |
+
from urllib.parse import urlsplit
|
| 25 |
+
|
| 26 |
+
if TYPE_CHECKING:
|
| 27 |
+
from typing_extensions import TypeGuard
|
| 28 |
+
|
| 29 |
+
from fsspec.spec import AbstractFileSystem
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
| 33 |
+
|
| 34 |
+
T = TypeVar("T")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def infer_storage_options(
|
| 38 |
+
urlpath: str, inherit_storage_options: dict[str, Any] | None = None
|
| 39 |
+
) -> dict[str, Any]:
|
| 40 |
+
"""Infer storage options from URL path and merge it with existing storage
|
| 41 |
+
options.
|
| 42 |
+
|
| 43 |
+
Parameters
|
| 44 |
+
----------
|
| 45 |
+
urlpath: str or unicode
|
| 46 |
+
Either local absolute file path or URL (hdfs://namenode:8020/file.csv)
|
| 47 |
+
inherit_storage_options: dict (optional)
|
| 48 |
+
Its contents will get merged with the inferred information from the
|
| 49 |
+
given path
|
| 50 |
+
|
| 51 |
+
Returns
|
| 52 |
+
-------
|
| 53 |
+
Storage options dict.
|
| 54 |
+
|
| 55 |
+
Examples
|
| 56 |
+
--------
|
| 57 |
+
>>> infer_storage_options('/mnt/datasets/test.csv') # doctest: +SKIP
|
| 58 |
+
{"protocol": "file", "path", "/mnt/datasets/test.csv"}
|
| 59 |
+
>>> infer_storage_options(
|
| 60 |
+
... 'hdfs://username:pwd@node:123/mnt/datasets/test.csv?q=1',
|
| 61 |
+
... inherit_storage_options={'extra': 'value'},
|
| 62 |
+
... ) # doctest: +SKIP
|
| 63 |
+
{"protocol": "hdfs", "username": "username", "password": "pwd",
|
| 64 |
+
"host": "node", "port": 123, "path": "/mnt/datasets/test.csv",
|
| 65 |
+
"url_query": "q=1", "extra": "value"}
|
| 66 |
+
"""
|
| 67 |
+
# Handle Windows paths including disk name in this special case
|
| 68 |
+
if (
|
| 69 |
+
re.match(r"^[a-zA-Z]:[\\/]", urlpath)
|
| 70 |
+
or re.match(r"^[a-zA-Z0-9]+://", urlpath) is None
|
| 71 |
+
):
|
| 72 |
+
return {"protocol": "file", "path": urlpath}
|
| 73 |
+
|
| 74 |
+
parsed_path = urlsplit(urlpath)
|
| 75 |
+
protocol = parsed_path.scheme or "file"
|
| 76 |
+
if parsed_path.fragment:
|
| 77 |
+
path = "#".join([parsed_path.path, parsed_path.fragment])
|
| 78 |
+
else:
|
| 79 |
+
path = parsed_path.path
|
| 80 |
+
if protocol == "file":
|
| 81 |
+
# Special case parsing file protocol URL on Windows according to:
|
| 82 |
+
# https://msdn.microsoft.com/en-us/library/jj710207.aspx
|
| 83 |
+
windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
|
| 84 |
+
if windows_path:
|
| 85 |
+
path = "%s:%s" % windows_path.groups()
|
| 86 |
+
|
| 87 |
+
if protocol in ["http", "https"]:
|
| 88 |
+
# for HTTP, we don't want to parse, as requests will anyway
|
| 89 |
+
return {"protocol": protocol, "path": urlpath}
|
| 90 |
+
|
| 91 |
+
options: dict[str, Any] = {"protocol": protocol, "path": path}
|
| 92 |
+
|
| 93 |
+
if parsed_path.netloc:
|
| 94 |
+
# Parse `hostname` from netloc manually because `parsed_path.hostname`
|
| 95 |
+
# lowercases the hostname which is not always desirable (e.g. in S3):
|
| 96 |
+
# https://github.com/dask/dask/issues/1417
|
| 97 |
+
options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
|
| 98 |
+
|
| 99 |
+
if protocol in ("s3", "s3a", "gcs", "gs"):
|
| 100 |
+
options["path"] = options["host"] + options["path"]
|
| 101 |
+
else:
|
| 102 |
+
options["host"] = options["host"]
|
| 103 |
+
if parsed_path.port:
|
| 104 |
+
options["port"] = parsed_path.port
|
| 105 |
+
if parsed_path.username:
|
| 106 |
+
options["username"] = parsed_path.username
|
| 107 |
+
if parsed_path.password:
|
| 108 |
+
options["password"] = parsed_path.password
|
| 109 |
+
|
| 110 |
+
if parsed_path.query:
|
| 111 |
+
options["url_query"] = parsed_path.query
|
| 112 |
+
if parsed_path.fragment:
|
| 113 |
+
options["url_fragment"] = parsed_path.fragment
|
| 114 |
+
|
| 115 |
+
if inherit_storage_options:
|
| 116 |
+
update_storage_options(options, inherit_storage_options)
|
| 117 |
+
|
| 118 |
+
return options
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def update_storage_options(
|
| 122 |
+
options: dict[str, Any], inherited: dict[str, Any] | None = None
|
| 123 |
+
) -> None:
|
| 124 |
+
if not inherited:
|
| 125 |
+
inherited = {}
|
| 126 |
+
collisions = set(options) & set(inherited)
|
| 127 |
+
if collisions:
|
| 128 |
+
for collision in collisions:
|
| 129 |
+
if options.get(collision) != inherited.get(collision):
|
| 130 |
+
raise KeyError(
|
| 131 |
+
f"Collision between inferred and specified storage "
|
| 132 |
+
f"option:\n{collision}"
|
| 133 |
+
)
|
| 134 |
+
options.update(inherited)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# Compression extensions registered via fsspec.compression.register_compression
|
| 138 |
+
compressions: dict[str, str] = {}
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def infer_compression(filename: str) -> str | None:
|
| 142 |
+
"""Infer compression, if available, from filename.
|
| 143 |
+
|
| 144 |
+
Infer a named compression type, if registered and available, from filename
|
| 145 |
+
extension. This includes builtin (gz, bz2, zip) compressions, as well as
|
| 146 |
+
optional compressions. See fsspec.compression.register_compression.
|
| 147 |
+
"""
|
| 148 |
+
extension = os.path.splitext(filename)[-1].strip(".").lower()
|
| 149 |
+
if extension in compressions:
|
| 150 |
+
return compressions[extension]
|
| 151 |
+
return None
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def build_name_function(max_int: float) -> Callable[[int], str]:
|
| 155 |
+
"""Returns a function that receives a single integer
|
| 156 |
+
and returns it as a string padded by enough zero characters
|
| 157 |
+
to align with maximum possible integer
|
| 158 |
+
|
| 159 |
+
>>> name_f = build_name_function(57)
|
| 160 |
+
|
| 161 |
+
>>> name_f(7)
|
| 162 |
+
'07'
|
| 163 |
+
>>> name_f(31)
|
| 164 |
+
'31'
|
| 165 |
+
>>> build_name_function(1000)(42)
|
| 166 |
+
'0042'
|
| 167 |
+
>>> build_name_function(999)(42)
|
| 168 |
+
'042'
|
| 169 |
+
>>> build_name_function(0)(0)
|
| 170 |
+
'0'
|
| 171 |
+
"""
|
| 172 |
+
# handle corner cases max_int is 0 or exact power of 10
|
| 173 |
+
max_int += 1e-8
|
| 174 |
+
|
| 175 |
+
pad_length = int(math.ceil(math.log10(max_int)))
|
| 176 |
+
|
| 177 |
+
def name_function(i: int) -> str:
|
| 178 |
+
return str(i).zfill(pad_length)
|
| 179 |
+
|
| 180 |
+
return name_function
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
|
| 184 |
+
r"""Seek current file to file start, file end, or byte after delimiter seq.
|
| 185 |
+
|
| 186 |
+
Seeks file to next chunk delimiter, where chunks are defined on file start,
|
| 187 |
+
a delimiting sequence, and file end. Use file.tell() to see location afterwards.
|
| 188 |
+
Note that file start is a valid split, so must be at offset > 0 to seek for
|
| 189 |
+
delimiter.
|
| 190 |
+
|
| 191 |
+
Parameters
|
| 192 |
+
----------
|
| 193 |
+
file: a file
|
| 194 |
+
delimiter: bytes
|
| 195 |
+
a delimiter like ``b'\n'`` or message sentinel, matching file .read() type
|
| 196 |
+
blocksize: int
|
| 197 |
+
Number of bytes to read from the file at once.
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
Returns
|
| 201 |
+
-------
|
| 202 |
+
Returns True if a delimiter was found, False if at file start or end.
|
| 203 |
+
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
if file.tell() == 0:
|
| 207 |
+
# beginning-of-file, return without seek
|
| 208 |
+
return False
|
| 209 |
+
|
| 210 |
+
# Interface is for binary IO, with delimiter as bytes, but initialize last
|
| 211 |
+
# with result of file.read to preserve compatibility with text IO.
|
| 212 |
+
last: bytes | None = None
|
| 213 |
+
while True:
|
| 214 |
+
current = file.read(blocksize)
|
| 215 |
+
if not current:
|
| 216 |
+
# end-of-file without delimiter
|
| 217 |
+
return False
|
| 218 |
+
full = last + current if last else current
|
| 219 |
+
try:
|
| 220 |
+
if delimiter in full:
|
| 221 |
+
i = full.index(delimiter)
|
| 222 |
+
file.seek(file.tell() - (len(full) - i) + len(delimiter))
|
| 223 |
+
return True
|
| 224 |
+
elif len(current) < blocksize:
|
| 225 |
+
# end-of-file without delimiter
|
| 226 |
+
return False
|
| 227 |
+
except (OSError, ValueError):
|
| 228 |
+
pass
|
| 229 |
+
last = full[-len(delimiter) :]
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def read_block(
|
| 233 |
+
f: IO[bytes],
|
| 234 |
+
offset: int,
|
| 235 |
+
length: int | None,
|
| 236 |
+
delimiter: bytes | None = None,
|
| 237 |
+
split_before: bool = False,
|
| 238 |
+
) -> bytes:
|
| 239 |
+
"""Read a block of bytes from a file
|
| 240 |
+
|
| 241 |
+
Parameters
|
| 242 |
+
----------
|
| 243 |
+
f: File
|
| 244 |
+
Open file
|
| 245 |
+
offset: int
|
| 246 |
+
Byte offset to start read
|
| 247 |
+
length: int
|
| 248 |
+
Number of bytes to read, read through end of file if None
|
| 249 |
+
delimiter: bytes (optional)
|
| 250 |
+
Ensure reading starts and stops at delimiter bytestring
|
| 251 |
+
split_before: bool (optional)
|
| 252 |
+
Start/stop read *before* delimiter bytestring.
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
If using the ``delimiter=`` keyword argument we ensure that the read
|
| 256 |
+
starts and stops at delimiter boundaries that follow the locations
|
| 257 |
+
``offset`` and ``offset + length``. If ``offset`` is zero then we
|
| 258 |
+
start at zero, regardless of delimiter. The bytestring returned WILL
|
| 259 |
+
include the terminating delimiter string.
|
| 260 |
+
|
| 261 |
+
Examples
|
| 262 |
+
--------
|
| 263 |
+
|
| 264 |
+
>>> from io import BytesIO # doctest: +SKIP
|
| 265 |
+
>>> f = BytesIO(b'Alice, 100\\nBob, 200\\nCharlie, 300') # doctest: +SKIP
|
| 266 |
+
>>> read_block(f, 0, 13) # doctest: +SKIP
|
| 267 |
+
b'Alice, 100\\nBo'
|
| 268 |
+
|
| 269 |
+
>>> read_block(f, 0, 13, delimiter=b'\\n') # doctest: +SKIP
|
| 270 |
+
b'Alice, 100\\nBob, 200\\n'
|
| 271 |
+
|
| 272 |
+
>>> read_block(f, 10, 10, delimiter=b'\\n') # doctest: +SKIP
|
| 273 |
+
b'Bob, 200\\nCharlie, 300'
|
| 274 |
+
"""
|
| 275 |
+
if delimiter:
|
| 276 |
+
f.seek(offset)
|
| 277 |
+
found_start_delim = seek_delimiter(f, delimiter, 2**16)
|
| 278 |
+
if length is None:
|
| 279 |
+
return f.read()
|
| 280 |
+
start = f.tell()
|
| 281 |
+
length -= start - offset
|
| 282 |
+
|
| 283 |
+
f.seek(start + length)
|
| 284 |
+
found_end_delim = seek_delimiter(f, delimiter, 2**16)
|
| 285 |
+
end = f.tell()
|
| 286 |
+
|
| 287 |
+
# Adjust split location to before delimiter iff seek found the
|
| 288 |
+
# delimiter sequence, not start or end of file.
|
| 289 |
+
if found_start_delim and split_before:
|
| 290 |
+
start -= len(delimiter)
|
| 291 |
+
|
| 292 |
+
if found_end_delim and split_before:
|
| 293 |
+
end -= len(delimiter)
|
| 294 |
+
|
| 295 |
+
offset = start
|
| 296 |
+
length = end - start
|
| 297 |
+
|
| 298 |
+
f.seek(offset)
|
| 299 |
+
|
| 300 |
+
# TODO: allow length to be None and read to the end of the file?
|
| 301 |
+
assert length is not None
|
| 302 |
+
b = f.read(length)
|
| 303 |
+
return b
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def tokenize(*args: Any, **kwargs: Any) -> str:
|
| 307 |
+
"""Deterministic token
|
| 308 |
+
|
| 309 |
+
(modified from dask.base)
|
| 310 |
+
|
| 311 |
+
>>> tokenize([1, 2, '3'])
|
| 312 |
+
'9d71491b50023b06fc76928e6eddb952'
|
| 313 |
+
|
| 314 |
+
>>> tokenize('Hello') == tokenize('Hello')
|
| 315 |
+
True
|
| 316 |
+
"""
|
| 317 |
+
if kwargs:
|
| 318 |
+
args += (kwargs,)
|
| 319 |
+
try:
|
| 320 |
+
h = md5(str(args).encode())
|
| 321 |
+
except ValueError:
|
| 322 |
+
# FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
|
| 323 |
+
h = md5(str(args).encode(), usedforsecurity=False)
|
| 324 |
+
return h.hexdigest()
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
|
| 328 |
+
"""Attempt to convert a path-like object to a string.
|
| 329 |
+
|
| 330 |
+
Parameters
|
| 331 |
+
----------
|
| 332 |
+
filepath: object to be converted
|
| 333 |
+
|
| 334 |
+
Returns
|
| 335 |
+
-------
|
| 336 |
+
filepath_str: maybe a string version of the object
|
| 337 |
+
|
| 338 |
+
Notes
|
| 339 |
+
-----
|
| 340 |
+
Objects supporting the fspath protocol are coerced according to its
|
| 341 |
+
__fspath__ method.
|
| 342 |
+
|
| 343 |
+
For backwards compatibility with older Python version, pathlib.Path
|
| 344 |
+
objects are specially coerced.
|
| 345 |
+
|
| 346 |
+
Any other object is passed through unchanged, which includes bytes,
|
| 347 |
+
strings, buffers, or anything else that's not even path-like.
|
| 348 |
+
"""
|
| 349 |
+
if isinstance(filepath, str):
|
| 350 |
+
return filepath
|
| 351 |
+
elif hasattr(filepath, "__fspath__"):
|
| 352 |
+
return filepath.__fspath__()
|
| 353 |
+
elif isinstance(filepath, pathlib.Path):
|
| 354 |
+
return str(filepath)
|
| 355 |
+
elif hasattr(filepath, "path"):
|
| 356 |
+
return filepath.path
|
| 357 |
+
else:
|
| 358 |
+
return filepath # type: ignore[return-value]
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def make_instance(
|
| 362 |
+
cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
|
| 363 |
+
) -> T:
|
| 364 |
+
inst = cls(*args, **kwargs)
|
| 365 |
+
inst._determine_worker() # type: ignore[attr-defined]
|
| 366 |
+
return inst
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def common_prefix(paths: Iterable[str]) -> str:
|
| 370 |
+
"""For a list of paths, find the shortest prefix common to all"""
|
| 371 |
+
parts = [p.split("/") for p in paths]
|
| 372 |
+
lmax = min(len(p) for p in parts)
|
| 373 |
+
end = 0
|
| 374 |
+
for i in range(lmax):
|
| 375 |
+
end = all(p[i] == parts[0][i] for p in parts)
|
| 376 |
+
if not end:
|
| 377 |
+
break
|
| 378 |
+
i += end
|
| 379 |
+
return "/".join(parts[0][:i])
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
def other_paths(
|
| 383 |
+
paths: list[str],
|
| 384 |
+
path2: str | list[str],
|
| 385 |
+
exists: bool = False,
|
| 386 |
+
flatten: bool = False,
|
| 387 |
+
) -> list[str]:
|
| 388 |
+
"""In bulk file operations, construct a new file tree from a list of files
|
| 389 |
+
|
| 390 |
+
Parameters
|
| 391 |
+
----------
|
| 392 |
+
paths: list of str
|
| 393 |
+
The input file tree
|
| 394 |
+
path2: str or list of str
|
| 395 |
+
Root to construct the new list in. If this is already a list of str, we just
|
| 396 |
+
assert it has the right number of elements.
|
| 397 |
+
exists: bool (optional)
|
| 398 |
+
For a str destination, it is already exists (and is a dir), files should
|
| 399 |
+
end up inside.
|
| 400 |
+
flatten: bool (optional)
|
| 401 |
+
Whether to flatten the input directory tree structure so that the output files
|
| 402 |
+
are in the same directory.
|
| 403 |
+
|
| 404 |
+
Returns
|
| 405 |
+
-------
|
| 406 |
+
list of str
|
| 407 |
+
"""
|
| 408 |
+
|
| 409 |
+
if isinstance(path2, str):
|
| 410 |
+
path2 = path2.rstrip("/")
|
| 411 |
+
|
| 412 |
+
if flatten:
|
| 413 |
+
path2 = ["/".join((path2, p.split("/")[-1])) for p in paths]
|
| 414 |
+
else:
|
| 415 |
+
cp = common_prefix(paths)
|
| 416 |
+
if exists:
|
| 417 |
+
cp = cp.rsplit("/", 1)[0]
|
| 418 |
+
if not cp and all(not s.startswith("/") for s in paths):
|
| 419 |
+
path2 = ["/".join([path2, p]) for p in paths]
|
| 420 |
+
else:
|
| 421 |
+
path2 = [p.replace(cp, path2, 1) for p in paths]
|
| 422 |
+
else:
|
| 423 |
+
assert len(paths) == len(path2)
|
| 424 |
+
return path2
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def is_exception(obj: Any) -> bool:
|
| 428 |
+
return isinstance(obj, BaseException)
|
| 429 |
+
|
| 430 |
+
|
| 431 |
+
def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
|
| 432 |
+
for attr in ["read", "close", "tell"]:
|
| 433 |
+
if not hasattr(f, attr):
|
| 434 |
+
return False
|
| 435 |
+
return True
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
def get_protocol(url: str) -> str:
|
| 439 |
+
url = stringify_path(url)
|
| 440 |
+
parts = re.split(r"(\:\:|\://)", url, 1)
|
| 441 |
+
if len(parts) > 1:
|
| 442 |
+
return parts[0]
|
| 443 |
+
return "file"
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def can_be_local(path: str) -> bool:
|
| 447 |
+
"""Can the given URL be used with open_local?"""
|
| 448 |
+
from fsspec import get_filesystem_class
|
| 449 |
+
|
| 450 |
+
try:
|
| 451 |
+
return getattr(get_filesystem_class(get_protocol(path)), "local_file", False)
|
| 452 |
+
except (ValueError, ImportError):
|
| 453 |
+
# not in registry or import failed
|
| 454 |
+
return False
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def get_package_version_without_import(name: str) -> str | None:
|
| 458 |
+
"""For given package name, try to find the version without importing it
|
| 459 |
+
|
| 460 |
+
Import and package.__version__ is still the backup here, so an import
|
| 461 |
+
*might* happen.
|
| 462 |
+
|
| 463 |
+
Returns either the version string, or None if the package
|
| 464 |
+
or the version was not readily found.
|
| 465 |
+
"""
|
| 466 |
+
if name in sys.modules:
|
| 467 |
+
mod = sys.modules[name]
|
| 468 |
+
if hasattr(mod, "__version__"):
|
| 469 |
+
return mod.__version__
|
| 470 |
+
try:
|
| 471 |
+
return version(name)
|
| 472 |
+
except: # noqa: E722
|
| 473 |
+
pass
|
| 474 |
+
try:
|
| 475 |
+
import importlib
|
| 476 |
+
|
| 477 |
+
mod = importlib.import_module(name)
|
| 478 |
+
return mod.__version__
|
| 479 |
+
except (ImportError, AttributeError):
|
| 480 |
+
return None
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
def setup_logging(
|
| 484 |
+
logger: logging.Logger | None = None,
|
| 485 |
+
logger_name: str | None = None,
|
| 486 |
+
level: str = "DEBUG",
|
| 487 |
+
clear: bool = True,
|
| 488 |
+
) -> logging.Logger:
|
| 489 |
+
if logger is None and logger_name is None:
|
| 490 |
+
raise ValueError("Provide either logger object or logger name")
|
| 491 |
+
logger = logger or logging.getLogger(logger_name)
|
| 492 |
+
handle = logging.StreamHandler()
|
| 493 |
+
formatter = logging.Formatter(
|
| 494 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(funcName)s -- %(message)s"
|
| 495 |
+
)
|
| 496 |
+
handle.setFormatter(formatter)
|
| 497 |
+
if clear:
|
| 498 |
+
logger.handlers.clear()
|
| 499 |
+
logger.addHandler(handle)
|
| 500 |
+
logger.setLevel(level)
|
| 501 |
+
return logger
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
|
| 505 |
+
return fs.unstrip_protocol(name)
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
def mirror_from(
|
| 509 |
+
origin_name: str, methods: Iterable[str]
|
| 510 |
+
) -> Callable[[type[T]], type[T]]:
|
| 511 |
+
"""Mirror attributes and methods from the given
|
| 512 |
+
origin_name attribute of the instance to the
|
| 513 |
+
decorated class"""
|
| 514 |
+
|
| 515 |
+
def origin_getter(method: str, self: Any) -> Any:
|
| 516 |
+
origin = getattr(self, origin_name)
|
| 517 |
+
return getattr(origin, method)
|
| 518 |
+
|
| 519 |
+
def wrapper(cls: type[T]) -> type[T]:
|
| 520 |
+
for method in methods:
|
| 521 |
+
wrapped_method = partial(origin_getter, method)
|
| 522 |
+
setattr(cls, method, property(wrapped_method))
|
| 523 |
+
return cls
|
| 524 |
+
|
| 525 |
+
return wrapper
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
@contextlib.contextmanager
|
| 529 |
+
def nullcontext(obj: T) -> Iterator[T]:
|
| 530 |
+
yield obj
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
def merge_offset_ranges(
|
| 534 |
+
paths: list[str],
|
| 535 |
+
starts: list[int] | int,
|
| 536 |
+
ends: list[int] | int,
|
| 537 |
+
max_gap: int = 0,
|
| 538 |
+
max_block: int | None = None,
|
| 539 |
+
sort: bool = True,
|
| 540 |
+
) -> tuple[list[str], list[int], list[int]]:
|
| 541 |
+
"""Merge adjacent byte-offset ranges when the inter-range
|
| 542 |
+
gap is <= `max_gap`, and when the merged byte range does not
|
| 543 |
+
exceed `max_block` (if specified). By default, this function
|
| 544 |
+
will re-order the input paths and byte ranges to ensure sorted
|
| 545 |
+
order. If the user can guarantee that the inputs are already
|
| 546 |
+
sorted, passing `sort=False` will skip the re-ordering.
|
| 547 |
+
"""
|
| 548 |
+
# Check input
|
| 549 |
+
if not isinstance(paths, list):
|
| 550 |
+
raise TypeError
|
| 551 |
+
if not isinstance(starts, list):
|
| 552 |
+
starts = [starts] * len(paths)
|
| 553 |
+
if not isinstance(ends, list):
|
| 554 |
+
ends = [ends] * len(paths)
|
| 555 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
| 556 |
+
raise ValueError
|
| 557 |
+
|
| 558 |
+
# Early Return
|
| 559 |
+
if len(starts) <= 1:
|
| 560 |
+
return paths, starts, ends
|
| 561 |
+
|
| 562 |
+
starts = [s or 0 for s in starts]
|
| 563 |
+
# Sort by paths and then ranges if `sort=True`
|
| 564 |
+
if sort:
|
| 565 |
+
paths, starts, ends = (
|
| 566 |
+
list(v)
|
| 567 |
+
for v in zip(
|
| 568 |
+
*sorted(
|
| 569 |
+
zip(paths, starts, ends),
|
| 570 |
+
)
|
| 571 |
+
)
|
| 572 |
+
)
|
| 573 |
+
|
| 574 |
+
if paths:
|
| 575 |
+
# Loop through the coupled `paths`, `starts`, and
|
| 576 |
+
# `ends`, and merge adjacent blocks when appropriate
|
| 577 |
+
new_paths = paths[:1]
|
| 578 |
+
new_starts = starts[:1]
|
| 579 |
+
new_ends = ends[:1]
|
| 580 |
+
for i in range(1, len(paths)):
|
| 581 |
+
if paths[i] == paths[i - 1] and new_ends[-1] is None:
|
| 582 |
+
continue
|
| 583 |
+
elif (
|
| 584 |
+
paths[i] != paths[i - 1]
|
| 585 |
+
or ((starts[i] - new_ends[-1]) > max_gap)
|
| 586 |
+
or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
|
| 587 |
+
):
|
| 588 |
+
# Cannot merge with previous block.
|
| 589 |
+
# Add new `paths`, `starts`, and `ends` elements
|
| 590 |
+
new_paths.append(paths[i])
|
| 591 |
+
new_starts.append(starts[i])
|
| 592 |
+
new_ends.append(ends[i])
|
| 593 |
+
else:
|
| 594 |
+
# Merge with previous block by updating the
|
| 595 |
+
# last element of `ends`
|
| 596 |
+
new_ends[-1] = ends[i]
|
| 597 |
+
return new_paths, new_starts, new_ends
|
| 598 |
+
|
| 599 |
+
# `paths` is empty. Just return input lists
|
| 600 |
+
return paths, starts, ends
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
def file_size(filelike: IO[bytes]) -> int:
|
| 604 |
+
"""Find length of any open read-mode file-like"""
|
| 605 |
+
pos = filelike.tell()
|
| 606 |
+
try:
|
| 607 |
+
return filelike.seek(0, 2)
|
| 608 |
+
finally:
|
| 609 |
+
filelike.seek(pos)
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
@contextlib.contextmanager
|
| 613 |
+
def atomic_write(path: str, mode: str = "wb"):
|
| 614 |
+
"""
|
| 615 |
+
A context manager that opens a temporary file next to `path` and, on exit,
|
| 616 |
+
replaces `path` with the temporary file, thereby updating `path`
|
| 617 |
+
atomically.
|
| 618 |
+
"""
|
| 619 |
+
fd, fn = tempfile.mkstemp(
|
| 620 |
+
dir=os.path.dirname(path), prefix=os.path.basename(path) + "-"
|
| 621 |
+
)
|
| 622 |
+
try:
|
| 623 |
+
with open(fd, mode) as fp:
|
| 624 |
+
yield fp
|
| 625 |
+
except BaseException:
|
| 626 |
+
with contextlib.suppress(FileNotFoundError):
|
| 627 |
+
os.unlink(fn)
|
| 628 |
+
raise
|
| 629 |
+
else:
|
| 630 |
+
os.replace(fn, path)
|
| 631 |
+
|
| 632 |
+
|
| 633 |
+
def _translate(pat, STAR, QUESTION_MARK):
|
| 634 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
| 635 |
+
res: list[str] = []
|
| 636 |
+
add = res.append
|
| 637 |
+
i, n = 0, len(pat)
|
| 638 |
+
while i < n:
|
| 639 |
+
c = pat[i]
|
| 640 |
+
i = i + 1
|
| 641 |
+
if c == "*":
|
| 642 |
+
# compress consecutive `*` into one
|
| 643 |
+
if (not res) or res[-1] is not STAR:
|
| 644 |
+
add(STAR)
|
| 645 |
+
elif c == "?":
|
| 646 |
+
add(QUESTION_MARK)
|
| 647 |
+
elif c == "[":
|
| 648 |
+
j = i
|
| 649 |
+
if j < n and pat[j] == "!":
|
| 650 |
+
j = j + 1
|
| 651 |
+
if j < n and pat[j] == "]":
|
| 652 |
+
j = j + 1
|
| 653 |
+
while j < n and pat[j] != "]":
|
| 654 |
+
j = j + 1
|
| 655 |
+
if j >= n:
|
| 656 |
+
add("\\[")
|
| 657 |
+
else:
|
| 658 |
+
stuff = pat[i:j]
|
| 659 |
+
if "-" not in stuff:
|
| 660 |
+
stuff = stuff.replace("\\", r"\\")
|
| 661 |
+
else:
|
| 662 |
+
chunks = []
|
| 663 |
+
k = i + 2 if pat[i] == "!" else i + 1
|
| 664 |
+
while True:
|
| 665 |
+
k = pat.find("-", k, j)
|
| 666 |
+
if k < 0:
|
| 667 |
+
break
|
| 668 |
+
chunks.append(pat[i:k])
|
| 669 |
+
i = k + 1
|
| 670 |
+
k = k + 3
|
| 671 |
+
chunk = pat[i:j]
|
| 672 |
+
if chunk:
|
| 673 |
+
chunks.append(chunk)
|
| 674 |
+
else:
|
| 675 |
+
chunks[-1] += "-"
|
| 676 |
+
# Remove empty ranges -- invalid in RE.
|
| 677 |
+
for k in range(len(chunks) - 1, 0, -1):
|
| 678 |
+
if chunks[k - 1][-1] > chunks[k][0]:
|
| 679 |
+
chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
|
| 680 |
+
del chunks[k]
|
| 681 |
+
# Escape backslashes and hyphens for set difference (--).
|
| 682 |
+
# Hyphens that create ranges shouldn't be escaped.
|
| 683 |
+
stuff = "-".join(
|
| 684 |
+
s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
|
| 685 |
+
)
|
| 686 |
+
# Escape set operations (&&, ~~ and ||).
|
| 687 |
+
stuff = re.sub(r"([&~|])", r"\\\1", stuff)
|
| 688 |
+
i = j + 1
|
| 689 |
+
if not stuff:
|
| 690 |
+
# Empty range: never match.
|
| 691 |
+
add("(?!)")
|
| 692 |
+
elif stuff == "!":
|
| 693 |
+
# Negated empty range: match any character.
|
| 694 |
+
add(".")
|
| 695 |
+
else:
|
| 696 |
+
if stuff[0] == "!":
|
| 697 |
+
stuff = "^" + stuff[1:]
|
| 698 |
+
elif stuff[0] in ("^", "["):
|
| 699 |
+
stuff = "\\" + stuff
|
| 700 |
+
add(f"[{stuff}]")
|
| 701 |
+
else:
|
| 702 |
+
add(re.escape(c))
|
| 703 |
+
assert i == n
|
| 704 |
+
return res
|
| 705 |
+
|
| 706 |
+
|
| 707 |
+
def glob_translate(pat):
|
| 708 |
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
| 709 |
+
# The keyword parameters' values are fixed to:
|
| 710 |
+
# recursive=True, include_hidden=True, seps=None
|
| 711 |
+
"""Translate a pathname with shell wildcards to a regular expression."""
|
| 712 |
+
if os.path.altsep:
|
| 713 |
+
seps = os.path.sep + os.path.altsep
|
| 714 |
+
else:
|
| 715 |
+
seps = os.path.sep
|
| 716 |
+
escaped_seps = "".join(map(re.escape, seps))
|
| 717 |
+
any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
|
| 718 |
+
not_sep = f"[^{escaped_seps}]"
|
| 719 |
+
one_last_segment = f"{not_sep}+"
|
| 720 |
+
one_segment = f"{one_last_segment}{any_sep}"
|
| 721 |
+
any_segments = f"(?:.+{any_sep})?"
|
| 722 |
+
any_last_segments = ".*"
|
| 723 |
+
results = []
|
| 724 |
+
parts = re.split(any_sep, pat)
|
| 725 |
+
last_part_idx = len(parts) - 1
|
| 726 |
+
for idx, part in enumerate(parts):
|
| 727 |
+
if part == "*":
|
| 728 |
+
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
| 729 |
+
continue
|
| 730 |
+
if part == "**":
|
| 731 |
+
results.append(any_segments if idx < last_part_idx else any_last_segments)
|
| 732 |
+
continue
|
| 733 |
+
elif "**" in part:
|
| 734 |
+
raise ValueError(
|
| 735 |
+
"Invalid pattern: '**' can only be an entire path component"
|
| 736 |
+
)
|
| 737 |
+
if part:
|
| 738 |
+
results.extend(_translate(part, f"{not_sep}*", not_sep))
|
| 739 |
+
if idx < last_part_idx:
|
| 740 |
+
results.append(any_sep)
|
| 741 |
+
res = "".join(results)
|
| 742 |
+
return rf"(?s:{res})\Z"
|
lib/python3.11/site-packages/functorch/_C.cpython-311-darwin.so
ADDED
|
Binary file (332 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
from torch._functorch.deprecated import (
|
| 9 |
+
combine_state_for_ensemble,
|
| 10 |
+
functionalize,
|
| 11 |
+
grad,
|
| 12 |
+
grad_and_value,
|
| 13 |
+
hessian,
|
| 14 |
+
jacfwd,
|
| 15 |
+
jacrev,
|
| 16 |
+
jvp,
|
| 17 |
+
make_functional,
|
| 18 |
+
make_functional_with_buffers,
|
| 19 |
+
vjp,
|
| 20 |
+
vmap,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# utilities. Maybe these should go in their own namespace in the future?
|
| 24 |
+
from torch._functorch.make_functional import (
|
| 25 |
+
FunctionalModule,
|
| 26 |
+
FunctionalModuleWithBuffers,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Top-level APIs. Please think carefully before adding something to the
|
| 30 |
+
# top-level namespace:
|
| 31 |
+
# - private helper functions should go into torch._functorch
|
| 32 |
+
# - very experimental things should go into functorch.experimental
|
| 33 |
+
# - compilation related things should go into functorch.compile
|
| 34 |
+
|
| 35 |
+
# Was never documented
|
| 36 |
+
from torch._functorch.python_key import make_fx
|
| 37 |
+
|
| 38 |
+
__version__ = torch.__version__
|
lib/python3.11/site-packages/functorch/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (947 Bytes). View file
|
|
|
lib/python3.11/site-packages/functorch/_src/__init__.py
ADDED
|
File without changes
|
lib/python3.11/site-packages/functorch/_src/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (229 Bytes). View file
|
|
|
lib/python3.11/site-packages/functorch/_src/aot_autograd/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
| 2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
| 3 |
+
# imports, please file an issue.
|
| 4 |
+
from torch._functorch.aot_autograd import (
|
| 5 |
+
aot_autograd_decompositions,
|
| 6 |
+
KNOWN_TYPES,
|
| 7 |
+
PytreeThunk,
|
| 8 |
+
)
|
lib/python3.11/site-packages/functorch/_src/aot_autograd/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (417 Bytes). View file
|
|
|
lib/python3.11/site-packages/functorch/_src/eager_transforms/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
| 2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
| 3 |
+
# imports, please file an issue.
|
| 4 |
+
from torch._functorch.eager_transforms import (
|
| 5 |
+
_assert_wrapped_functional,
|
| 6 |
+
_unwrap_functional_tensor,
|
| 7 |
+
)
|
lib/python3.11/site-packages/functorch/_src/eager_transforms/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (406 Bytes). View file
|
|
|
lib/python3.11/site-packages/functorch/_src/make_functional/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
| 2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
| 3 |
+
# imports, please file an issue.
|
| 4 |
+
from torch._functorch.make_functional import _swap_state
|
lib/python3.11/site-packages/functorch/_src/make_functional/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (329 Bytes). View file
|
|
|
lib/python3.11/site-packages/functorch/_src/vmap/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
| 2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
| 3 |
+
# imports, please file an issue.
|
| 4 |
+
from torch._functorch.vmap import (
|
| 5 |
+
_add_batch_dim,
|
| 6 |
+
_broadcast_to_and_flatten,
|
| 7 |
+
_create_batched_inputs,
|
| 8 |
+
_get_name,
|
| 9 |
+
_process_batched_inputs,
|
| 10 |
+
_remove_batch_dim,
|
| 11 |
+
_unwrap_batched,
|
| 12 |
+
_validate_and_get_batch_size,
|
| 13 |
+
Tensor,
|
| 14 |
+
tree_flatten,
|
| 15 |
+
tree_unflatten,
|
| 16 |
+
)
|
lib/python3.11/site-packages/functorch/_src/vmap/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (705 Bytes). View file
|
|
|
lib/python3.11/site-packages/functorch/compile/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch._functorch import config
|
| 2 |
+
from torch._functorch.aot_autograd import (
|
| 3 |
+
aot_function,
|
| 4 |
+
aot_module,
|
| 5 |
+
aot_module_simplified,
|
| 6 |
+
compiled_function,
|
| 7 |
+
compiled_module,
|
| 8 |
+
get_aot_compilation_context,
|
| 9 |
+
get_aot_graph_name,
|
| 10 |
+
get_graph_being_compiled,
|
| 11 |
+
make_boxed_compiler,
|
| 12 |
+
make_boxed_func,
|
| 13 |
+
)
|
| 14 |
+
from torch._functorch.compilers import (
|
| 15 |
+
debug_compile,
|
| 16 |
+
default_decompositions,
|
| 17 |
+
draw_graph_compile,
|
| 18 |
+
memory_efficient_fusion,
|
| 19 |
+
nnc_jit,
|
| 20 |
+
nop,
|
| 21 |
+
print_compile,
|
| 22 |
+
ts_compile,
|
| 23 |
+
)
|
| 24 |
+
from torch._functorch.fx_minifier import minifier
|
| 25 |
+
from torch._functorch.partitioners import (
|
| 26 |
+
default_partition,
|
| 27 |
+
draw_graph,
|
| 28 |
+
draw_joint_graph,
|
| 29 |
+
min_cut_rematerialization_partition,
|
| 30 |
+
)
|
| 31 |
+
from torch._functorch.python_key import pythonkey_decompose
|
lib/python3.11/site-packages/functorch/compile/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.47 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__init__.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dis
|
| 2 |
+
import inspect
|
| 3 |
+
from typing import Sequence, Union
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
import functorch._C
|
| 8 |
+
from functorch._C import dim as _C
|
| 9 |
+
from .tree_map import tree_flatten, tree_map
|
| 10 |
+
from .wrap_type import wrap_type
|
| 11 |
+
|
| 12 |
+
_C._patch_tensor_class()
|
| 13 |
+
dims, DimList, dimlists = _C.dims, _C.DimList, _C.dimlists
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class DimensionMismatchError(Exception):
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class DimensionBindError(Exception):
|
| 21 |
+
pass
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
from . import op_properties
|
| 25 |
+
|
| 26 |
+
# use dict to avoid writing C++ bindings for set
|
| 27 |
+
pointwise = {t: True for t in op_properties.pointwise}
|
| 28 |
+
|
| 29 |
+
use_c = True
|
| 30 |
+
if not use_c:
|
| 31 |
+
from . import reference
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class _Tensor:
|
| 35 |
+
# fast path around slow wrapping/unwrapping logic for simply queries used
|
| 36 |
+
# by the implementation...
|
| 37 |
+
|
| 38 |
+
@property
|
| 39 |
+
def dims(self):
|
| 40 |
+
return tuple(d for d in self._levels if isinstance(d, Dim))
|
| 41 |
+
|
| 42 |
+
def dim(self):
|
| 43 |
+
return self.ndim
|
| 44 |
+
|
| 45 |
+
if use_c:
|
| 46 |
+
__torch_function__ = classmethod(_C.__torch_function__)
|
| 47 |
+
expand = _C._instancemethod(_C.expand)
|
| 48 |
+
else:
|
| 49 |
+
__torch_function__ = reference.__torch_function__
|
| 50 |
+
expand = reference.expand
|
| 51 |
+
|
| 52 |
+
index = _C._instancemethod(_C.index)
|
| 53 |
+
|
| 54 |
+
def __repr__(self):
|
| 55 |
+
tensor, levels, ndim = self._tensor, self._levels, self.ndim
|
| 56 |
+
return f"{tensor}\nwith dims={tuple(l + ndim if isinstance(l, int) else l for l in levels)} sizes={tuple(tensor.size())}"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
TensorLike = (_Tensor, torch.Tensor)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class Dim(_C.Dim, _Tensor):
|
| 63 |
+
# note that _C.Dim comes before tensor because we want the Dim API for things like size to take precendence.
|
| 64 |
+
# Tensor defines format, but we want to print Dims with special formatting
|
| 65 |
+
__format__ = object.__format__
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class Tensor(_Tensor, _C.Tensor):
|
| 69 |
+
if not use_c:
|
| 70 |
+
from_batched = staticmethod(_C.Tensor_from_batched)
|
| 71 |
+
from_positional = staticmethod(_C.Tensor_from_positional)
|
| 72 |
+
sum = _C._instancemethod(_C.Tensor_sum)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def cat(tensors, dim, new_dim):
|
| 76 |
+
n = dims()
|
| 77 |
+
return stack(tensors, n, dim).index([n, dim], new_dim)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
if use_c:
|
| 81 |
+
_wrap = _C._wrap
|
| 82 |
+
|
| 83 |
+
def _def(name, *args, **kwargs):
|
| 84 |
+
orig = getattr(torch.Tensor, name)
|
| 85 |
+
setattr(_Tensor, name, _C._instancemethod(_wrap(orig, *args, **kwargs)))
|
| 86 |
+
|
| 87 |
+
t__getitem__ = _C._instancemethod(_C.__getitem__)
|
| 88 |
+
stack = _C.stack
|
| 89 |
+
split = _C._instancemethod(_C.split)
|
| 90 |
+
else:
|
| 91 |
+
_wrap, _def = reference._wrap, reference._def
|
| 92 |
+
t__getitem__ = reference.t__getitem__
|
| 93 |
+
stack = reference.stack
|
| 94 |
+
split = reference.split
|
| 95 |
+
|
| 96 |
+
# note: there is no python reference
|
| 97 |
+
t__setitem__ = _C._instancemethod(_C.__setitem__)
|
| 98 |
+
# this is patched in the C API because otherwise torch.Tensor will
|
| 99 |
+
# no longer be considered a sequence and things will break
|
| 100 |
+
# torch.Tensor.__getitem__ = t__getitem__
|
| 101 |
+
|
| 102 |
+
_Tensor.__getitem__ = t__getitem__
|
| 103 |
+
# torch.Tensor.__setitem__ = t__setitem__
|
| 104 |
+
_Tensor.__setitem__ = t__setitem__
|
| 105 |
+
|
| 106 |
+
torch.Tensor.split = split
|
| 107 |
+
_Tensor.split = split
|
| 108 |
+
torch.Tensor.expand = _C._instancemethod(_C.expand)
|
| 109 |
+
torch.Tensor.index = _C._instancemethod(_C.index)
|
| 110 |
+
wrap_type(use_c, _Tensor, torch.Tensor, _Tensor.__torch_function__)
|
| 111 |
+
del _Tensor.ndim
|
| 112 |
+
|
| 113 |
+
if use_c:
|
| 114 |
+
_Tensor.order = _C._instancemethod(_C.order)
|
| 115 |
+
else:
|
| 116 |
+
_Tensor.order = reference.positional
|
| 117 |
+
|
| 118 |
+
_def("mean")
|
| 119 |
+
_def("sum")
|
| 120 |
+
_def("all")
|
| 121 |
+
_def("amax")
|
| 122 |
+
_def("amin")
|
| 123 |
+
_def("aminmax")
|
| 124 |
+
_def("any")
|
| 125 |
+
_def("count_nonzero")
|
| 126 |
+
_def("logsumexp")
|
| 127 |
+
_def("nanmean")
|
| 128 |
+
_def("nansum")
|
| 129 |
+
_def("prod")
|
| 130 |
+
_def("std", keepdim_offset=2)
|
| 131 |
+
_def("var", keepdim_offset=2)
|
| 132 |
+
_def("max", single_dim=True)
|
| 133 |
+
_def("min", single_dim=True)
|
| 134 |
+
_def("argmax", single_dim=True)
|
| 135 |
+
_def("argmin", single_dim=True)
|
| 136 |
+
_def("kthvalue", single_dim=True)
|
| 137 |
+
_def("median", single_dim=True)
|
| 138 |
+
_def("nanmedian", single_dim=True)
|
| 139 |
+
_def("mode", single_dim=True)
|
| 140 |
+
_def("sort", reduce=False)
|
| 141 |
+
_def("argsort", reduce=False)
|
| 142 |
+
_def("unbind", single_dim=True)
|
| 143 |
+
_def("chunk", dim_offset=1, reduce=False)
|
| 144 |
+
_def("cummax", single_dim=True, reduce=False)
|
| 145 |
+
_def("cummin", single_dim=True, reduce=False)
|
| 146 |
+
_def("cumprod", single_dim=True, reduce=False)
|
| 147 |
+
_def("cumprod_", single_dim=True, reduce=False)
|
| 148 |
+
_def("cumsum", single_dim=True, reduce=False)
|
| 149 |
+
_def("cumsum_", single_dim=True, reduce=False)
|
| 150 |
+
_def("logcumsumexp", single_dim=True, reduce=False)
|
| 151 |
+
_def("renorm", dim_offset=1, single_dim=True, reduce=False)
|
| 152 |
+
_def("softmax", single_dim=True, reduce=False)
|
| 153 |
+
softmax = _wrap(torch.nn.functional.softmax, single_dim=True, reduce=False)
|
| 154 |
+
|
| 155 |
+
# stuff to handle in the future, because they require special
|
| 156 |
+
# binding logic for dims
|
| 157 |
+
# cross
|
| 158 |
+
# diag_embed
|
| 159 |
+
# diagonal
|
| 160 |
+
# diagonal_scatter
|
| 161 |
+
# diff
|
| 162 |
+
# nanquantile
|
| 163 |
+
# quantile
|
| 164 |
+
# roll
|
| 165 |
+
# rot90
|
| 166 |
+
# topk (new dimes on output)
|
| 167 |
+
# should these all be subsumed by inplace indexing?
|
| 168 |
+
# index_add_
|
| 169 |
+
# index_add
|
| 170 |
+
# index_copy
|
| 171 |
+
# index_copy_
|
| 172 |
+
# index_fill
|
| 173 |
+
# index_fill_
|
| 174 |
+
# index_select
|
| 175 |
+
# scatter
|
| 176 |
+
# scatter_
|
| 177 |
+
# scatter_add
|
| 178 |
+
# scatter_add_
|
| 179 |
+
# scatter_reduce
|
lib/python3.11/site-packages/functorch/dim/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (7.99 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/batch_tensor.cpython-311.pyc
ADDED
|
Binary file (1.29 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/delayed_mul_tensor.cpython-311.pyc
ADDED
|
Binary file (5.61 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/dim.cpython-311.pyc
ADDED
|
Binary file (6.89 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/magic_trace.cpython-311.pyc
ADDED
|
Binary file (2.49 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/op_properties.cpython-311.pyc
ADDED
|
Binary file (12.1 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/reference.cpython-311.pyc
ADDED
|
Binary file (32.4 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/tree_map.cpython-311.pyc
ADDED
|
Binary file (803 Bytes). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/__pycache__/wrap_type.cpython-311.pyc
ADDED
|
Binary file (2.55 kB). View file
|
|
|
lib/python3.11/site-packages/functorch/dim/batch_tensor.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
from contextlib import contextmanager
|
| 7 |
+
|
| 8 |
+
from torch._C._functorch import _vmap_add_layers, _vmap_remove_layers
|
| 9 |
+
|
| 10 |
+
_enabled = False
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@contextmanager
|
| 14 |
+
def _enable_layers(dims):
|
| 15 |
+
global _enabled
|
| 16 |
+
assert not _enabled
|
| 17 |
+
input = sorted((d._level, d.size) for d in dims if not isinstance(d, int))
|
| 18 |
+
n = len(input)
|
| 19 |
+
try:
|
| 20 |
+
_vmap_add_layers(input)
|
| 21 |
+
_enabled = True
|
| 22 |
+
yield
|
| 23 |
+
finally:
|
| 24 |
+
_enabled = False
|
| 25 |
+
_vmap_remove_layers(n)
|
lib/python3.11/site-packages/functorch/dim/delayed_mul_tensor.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
from . import _Tensor, Tensor
|
| 9 |
+
from .reference import _dims, _enable_layers, llist, ltuple
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class DelayedMulTensor(_Tensor):
|
| 13 |
+
def __init__(self, lhs, rhs):
|
| 14 |
+
self._lhs, self._rhs = lhs, rhs
|
| 15 |
+
self._data = None
|
| 16 |
+
self._levels_data = None
|
| 17 |
+
self._has_device = lhs._has_device or rhs._has_device
|
| 18 |
+
self._batchtensor_data = None
|
| 19 |
+
self._tensor_data = None
|
| 20 |
+
|
| 21 |
+
@property
|
| 22 |
+
def _levels(self):
|
| 23 |
+
if self._levels_data is None:
|
| 24 |
+
levels = llist(self._lhs._levels)
|
| 25 |
+
for l in self._rhs._levels:
|
| 26 |
+
if l not in levels:
|
| 27 |
+
levels.append(l)
|
| 28 |
+
self._levels_data = ltuple(levels)
|
| 29 |
+
return self._levels_data
|
| 30 |
+
|
| 31 |
+
@property
|
| 32 |
+
def _batchtensor(self):
|
| 33 |
+
if self._batchtensor_data is None:
|
| 34 |
+
with _enable_layers(self._levels):
|
| 35 |
+
print("bt multiply fallback")
|
| 36 |
+
self._batchtensor_data = self._lhs._batchtensor * self._rhs._batchtensor
|
| 37 |
+
return self._batchtensor_data
|
| 38 |
+
|
| 39 |
+
@property
|
| 40 |
+
def _tensor(self):
|
| 41 |
+
if self._tensor_data is None:
|
| 42 |
+
self._tensor_data = Tensor.from_batched(
|
| 43 |
+
self._batchtensor, self._has_device
|
| 44 |
+
)._tensor
|
| 45 |
+
return self._tensor_data
|
| 46 |
+
|
| 47 |
+
@property
|
| 48 |
+
def ndim(self):
|
| 49 |
+
return self._batchtensor.ndim
|
| 50 |
+
|
| 51 |
+
@property
|
| 52 |
+
def dims(self):
|
| 53 |
+
return ltuple(super().dims)
|
| 54 |
+
|
| 55 |
+
def sum(self, dim):
|
| 56 |
+
dims = _dims(dim, 0, False, False)
|
| 57 |
+
n = ord("a")
|
| 58 |
+
all_levels = self._levels
|
| 59 |
+
|
| 60 |
+
def to_char(d):
|
| 61 |
+
return chr(n + all_levels.index(d))
|
| 62 |
+
|
| 63 |
+
plhs, levelslhs = self._lhs._tensor, self._lhs._levels
|
| 64 |
+
prhs, levelsrhs = self._rhs._tensor, self._rhs._levels
|
| 65 |
+
new_dims = tuple(d for d in self.dims if d not in dims)
|
| 66 |
+
new_levels = [l for l in self._levels if l not in dims]
|
| 67 |
+
fmt = "".join(
|
| 68 |
+
[
|
| 69 |
+
*(to_char(d) for d in levelslhs),
|
| 70 |
+
",",
|
| 71 |
+
*(to_char(d) for d in levelsrhs),
|
| 72 |
+
"->",
|
| 73 |
+
*(to_char(d) for d in new_levels),
|
| 74 |
+
]
|
| 75 |
+
)
|
| 76 |
+
result_data = torch.einsum(fmt, (plhs, prhs))
|
| 77 |
+
return Tensor.from_positional(result_data, new_levels, True)
|
lib/python3.11/site-packages/functorch/dim/dim.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
_vmap_levels = []
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@dataclass
|
| 10 |
+
class LevelInfo:
|
| 11 |
+
level: int
|
| 12 |
+
alive: bool = True
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Dim:
|
| 16 |
+
def __init__(self, name: str, size: Union[None, int] = None):
|
| 17 |
+
self.name = name
|
| 18 |
+
self._size = None
|
| 19 |
+
self._vmap_level = None
|
| 20 |
+
if size is not None:
|
| 21 |
+
self.size = size
|
| 22 |
+
|
| 23 |
+
def __del__(self):
|
| 24 |
+
if self._vmap_level is not None:
|
| 25 |
+
_vmap_active_levels[self._vmap_stack].alive = False
|
| 26 |
+
while (
|
| 27 |
+
not _vmap_levels[-1].alive and current_level() == _vmap_levels[-1].level
|
| 28 |
+
):
|
| 29 |
+
_vmap_decrement_nesting()
|
| 30 |
+
_vmap_levels.pop()
|
| 31 |
+
|
| 32 |
+
@property
|
| 33 |
+
def size(self):
|
| 34 |
+
assert self.is_bound
|
| 35 |
+
return self._size
|
| 36 |
+
|
| 37 |
+
@size.setter
|
| 38 |
+
def size(self, size: int):
|
| 39 |
+
if self._size is None:
|
| 40 |
+
self._size = size
|
| 41 |
+
self._vmap_level = _vmap_increment_nesting(size, "same")
|
| 42 |
+
self._vmap_stack = len(_vmap_levels)
|
| 43 |
+
_vmap_levels.append(LevelInfo(self._vmap_level))
|
| 44 |
+
|
| 45 |
+
elif self._size != size:
|
| 46 |
+
raise DimensionBindError(
|
| 47 |
+
f"Dim '{self}' previously bound to a dimension of size {self._size} cannot bind to a dimension of size {size}"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
@property
|
| 51 |
+
def is_bound(self):
|
| 52 |
+
return self._size is not None
|
| 53 |
+
|
| 54 |
+
def __repr__(self):
|
| 55 |
+
return self.name
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def extract_name(inst):
|
| 59 |
+
assert inst.opname == "STORE_FAST" or inst.opname == "STORE_NAME"
|
| 60 |
+
return inst.argval
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
_cache = {}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def dims(lists=0):
|
| 67 |
+
frame = inspect.currentframe()
|
| 68 |
+
assert frame is not None
|
| 69 |
+
calling_frame = frame.f_back
|
| 70 |
+
assert calling_frame is not None
|
| 71 |
+
code, lasti = calling_frame.f_code, calling_frame.f_lasti
|
| 72 |
+
key = (code, lasti)
|
| 73 |
+
if key not in _cache:
|
| 74 |
+
first = lasti // 2 + 1
|
| 75 |
+
instructions = list(dis.get_instructions(calling_frame.f_code))
|
| 76 |
+
unpack = instructions[first]
|
| 77 |
+
|
| 78 |
+
if unpack.opname == "STORE_FAST" or unpack.opname == "STORE_NAME":
|
| 79 |
+
# just a single dim, not a list
|
| 80 |
+
name = unpack.argval
|
| 81 |
+
ctor = Dim if lists == 0 else DimList
|
| 82 |
+
_cache[key] = lambda: ctor(name=name)
|
| 83 |
+
else:
|
| 84 |
+
assert unpack.opname == "UNPACK_SEQUENCE"
|
| 85 |
+
ndims = unpack.argval
|
| 86 |
+
names = tuple(
|
| 87 |
+
extract_name(instructions[first + 1 + i]) for i in range(ndims)
|
| 88 |
+
)
|
| 89 |
+
first_list = len(names) - lists
|
| 90 |
+
_cache[key] = lambda: tuple(
|
| 91 |
+
Dim(n) if i < first_list else DimList(name=n)
|
| 92 |
+
for i, n in enumerate(names)
|
| 93 |
+
)
|
| 94 |
+
return _cache[key]()
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _dim_set(positional, arg):
|
| 98 |
+
def convert(a):
|
| 99 |
+
if isinstance(a, Dim):
|
| 100 |
+
return a
|
| 101 |
+
else:
|
| 102 |
+
assert isinstance(a, int)
|
| 103 |
+
return positional[a]
|
| 104 |
+
|
| 105 |
+
if arg is None:
|
| 106 |
+
return positional
|
| 107 |
+
elif not isinstance(arg, (Dim, int)):
|
| 108 |
+
return tuple(convert(a) for a in arg)
|
| 109 |
+
else:
|
| 110 |
+
return (convert(arg),)
|
lib/python3.11/site-packages/functorch/dim/magic_trace.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
import os
|
| 7 |
+
import signal
|
| 8 |
+
import subprocess
|
| 9 |
+
from contextlib import contextmanager
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@contextmanager
|
| 13 |
+
def magic_trace(output="trace.fxt", magic_trace_cache="/tmp/magic-trace"):
|
| 14 |
+
pid = os.getpid()
|
| 15 |
+
if not os.path.exists(magic_trace_cache):
|
| 16 |
+
print(f"Downloading magic_trace to: {magic_trace_cache}")
|
| 17 |
+
subprocess.run(
|
| 18 |
+
[
|
| 19 |
+
"wget",
|
| 20 |
+
"-O",
|
| 21 |
+
magic_trace_cache,
|
| 22 |
+
"-q",
|
| 23 |
+
"https://github.com/janestreet/magic-trace/releases/download/v1.0.2/magic-trace",
|
| 24 |
+
]
|
| 25 |
+
)
|
| 26 |
+
subprocess.run(["chmod", "+x", magic_trace_cache])
|
| 27 |
+
args = [magic_trace_cache, "attach", "-pid", str(pid), "-o", output]
|
| 28 |
+
p = subprocess.Popen(args, stderr=subprocess.PIPE, encoding="utf-8")
|
| 29 |
+
while True:
|
| 30 |
+
x = p.stderr.readline()
|
| 31 |
+
print(x)
|
| 32 |
+
if "Attached" in x:
|
| 33 |
+
break
|
| 34 |
+
try:
|
| 35 |
+
yield
|
| 36 |
+
finally:
|
| 37 |
+
p.send_signal(signal.SIGINT)
|
| 38 |
+
r = p.wait()
|
| 39 |
+
print(p.stderr.read())
|
| 40 |
+
p.stderr.close()
|
| 41 |
+
if r != 0:
|
| 42 |
+
raise ValueError(f"magic_trace exited abnormally: {r}")
|