Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- 17.CherryPimps.Give Me That Cock LIVE.mp4 +3 -0
- 17.Clips4Sale.Family Therapy.Let Daddy Help.mp4 +3 -0
- 17.RealityKings.Teaching Her To Suck Cock.mp4 +3 -0
- venv/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/filelock/__pycache__/__init__.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/filelock/__pycache__/_api.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/filelock/__pycache__/_error.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/filelock/__pycache__/_soft.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/filelock/__pycache__/_util.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/filelock/__pycache__/_windows.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/filelock/__pycache__/asyncio.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/filelock/__pycache__/version.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/hf_xet-1.1.5.dist-info/licenses/LICENSE +201 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py +135 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_manager.py +896 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_datetime.py +67 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_deprecation.py +136 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_experimental.py +68 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_fixes.py +133 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_http.py +637 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_pagination.py +52 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_paths.py +141 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_safetensors.py +111 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py +226 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/_xet.py +188 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/insecure_hashlib.py +38 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/logging.py +188 -0
- venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py +64 -0
- venv/lib/python3.12/site-packages/idna/__pycache__/core.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/idna/__pycache__/idnadata.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE +3 -0
- venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE.APACHE +177 -0
- venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE.BSD +23 -0
- venv/lib/python3.12/site-packages/pip/__pycache__/__init__.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/__pycache__/__main__.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/__pycache__/__pip-runner__.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__init__.py +18 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/__init__.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/build_env.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/cache.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/configuration.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/exceptions.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/main.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/pyproject.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/self_outdated_check.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/__pycache__/wheel_builder.cpython-312.pyc +0 -0
- venv/lib/python3.12/site-packages/pip/_internal/build_env.py +311 -0
.gitattributes
CHANGED
@@ -136,3 +136,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
136 |
19.Deeper.Dare[[:space:]]You.101231.mp4 filter=lfs diff=lfs merge=lfs -text
|
137 |
17.Mofos.Spinner[[:space:]]Sucks[[:space:]]Cock[[:space:]]for[[:space:]]Fame.mp4 filter=lfs diff=lfs merge=lfs -text
|
138 |
18.Mofos.A[[:space:]]Sneaky[[:space:]]Threesome[[:space:]]Situation.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
136 |
19.Deeper.Dare[[:space:]]You.101231.mp4 filter=lfs diff=lfs merge=lfs -text
|
137 |
17.Mofos.Spinner[[:space:]]Sucks[[:space:]]Cock[[:space:]]for[[:space:]]Fame.mp4 filter=lfs diff=lfs merge=lfs -text
|
138 |
18.Mofos.A[[:space:]]Sneaky[[:space:]]Threesome[[:space:]]Situation.mp4 filter=lfs diff=lfs merge=lfs -text
|
139 |
+
17.Clips4Sale.Family[[:space:]]Therapy.Let[[:space:]]Daddy[[:space:]]Help.mp4 filter=lfs diff=lfs merge=lfs -text
|
140 |
+
17.CherryPimps.Give[[:space:]]Me[[:space:]]That[[:space:]]Cock[[:space:]]LIVE.mp4 filter=lfs diff=lfs merge=lfs -text
|
141 |
+
17.RealityKings.Teaching[[:space:]]Her[[:space:]]To[[:space:]]Suck[[:space:]]Cock.mp4 filter=lfs diff=lfs merge=lfs -text
|
17.CherryPimps.Give Me That Cock LIVE.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0801b315fee505e3bf8263b012cdbab11bb8eae47f3a921b6695951cbeab4fa7
|
3 |
+
size 9009317124
|
17.Clips4Sale.Family Therapy.Let Daddy Help.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17607983880ebc83ade9468ffa6a25940422058756d187cc60b13908d29cd67a
|
3 |
+
size 159946715
|
17.RealityKings.Teaching Her To Suck Cock.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:752a736937fc5e9bee0a332ed57170cbf4aa42462d587e57d39510388cc98dde
|
3 |
+
size 2386235157
|
venv/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (304 Bytes). View file
|
|
venv/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc
ADDED
Binary file (619 Bytes). View file
|
|
venv/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc
ADDED
Binary file (2.05 kB). View file
|
|
venv/lib/python3.12/site-packages/filelock/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (1.57 kB). View file
|
|
venv/lib/python3.12/site-packages/filelock/__pycache__/_api.cpython-312.pyc
ADDED
Binary file (16.6 kB). View file
|
|
venv/lib/python3.12/site-packages/filelock/__pycache__/_error.cpython-312.pyc
ADDED
Binary file (1.75 kB). View file
|
|
venv/lib/python3.12/site-packages/filelock/__pycache__/_soft.cpython-312.pyc
ADDED
Binary file (2.46 kB). View file
|
|
venv/lib/python3.12/site-packages/filelock/__pycache__/_util.cpython-312.pyc
ADDED
Binary file (2 kB). View file
|
|
venv/lib/python3.12/site-packages/filelock/__pycache__/_windows.cpython-312.pyc
ADDED
Binary file (3.26 kB). View file
|
|
venv/lib/python3.12/site-packages/filelock/__pycache__/asyncio.cpython-312.pyc
ADDED
Binary file (15.6 kB). View file
|
|
venv/lib/python3.12/site-packages/filelock/__pycache__/version.cpython-312.pyc
ADDED
Binary file (639 Bytes). View file
|
|
venv/lib/python3.12/site-packages/hf_xet-1.1.5.dist-info/licenses/LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2019-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
from pathlib import Path
|
16 |
+
from typing import Union
|
17 |
+
|
18 |
+
from ..constants import HF_ASSETS_CACHE
|
19 |
+
|
20 |
+
|
21 |
+
def cached_assets_path(
|
22 |
+
library_name: str,
|
23 |
+
namespace: str = "default",
|
24 |
+
subfolder: str = "default",
|
25 |
+
*,
|
26 |
+
assets_dir: Union[str, Path, None] = None,
|
27 |
+
):
|
28 |
+
"""Return a folder path to cache arbitrary files.
|
29 |
+
|
30 |
+
`huggingface_hub` provides a canonical folder path to store assets. This is the
|
31 |
+
recommended way to integrate cache in a downstream library as it will benefit from
|
32 |
+
the builtins tools to scan and delete the cache properly.
|
33 |
+
|
34 |
+
The distinction is made between files cached from the Hub and assets. Files from the
|
35 |
+
Hub are cached in a git-aware manner and entirely managed by `huggingface_hub`. See
|
36 |
+
[related documentation](https://huggingface.co/docs/huggingface_hub/how-to-cache).
|
37 |
+
All other files that a downstream library caches are considered to be "assets"
|
38 |
+
(files downloaded from external sources, extracted from a .tar archive, preprocessed
|
39 |
+
for training,...).
|
40 |
+
|
41 |
+
Once the folder path is generated, it is guaranteed to exist and to be a directory.
|
42 |
+
The path is based on 3 levels of depth: the library name, a namespace and a
|
43 |
+
subfolder. Those 3 levels grants flexibility while allowing `huggingface_hub` to
|
44 |
+
expect folders when scanning/deleting parts of the assets cache. Within a library,
|
45 |
+
it is expected that all namespaces share the same subset of subfolder names but this
|
46 |
+
is not a mandatory rule. The downstream library has then full control on which file
|
47 |
+
structure to adopt within its cache. Namespace and subfolder are optional (would
|
48 |
+
default to a `"default/"` subfolder) but library name is mandatory as we want every
|
49 |
+
downstream library to manage its own cache.
|
50 |
+
|
51 |
+
Expected tree:
|
52 |
+
```text
|
53 |
+
assets/
|
54 |
+
└── datasets/
|
55 |
+
│ ├── SQuAD/
|
56 |
+
│ │ ├── downloaded/
|
57 |
+
│ │ ├── extracted/
|
58 |
+
│ │ └── processed/
|
59 |
+
│ ├── Helsinki-NLP--tatoeba_mt/
|
60 |
+
│ ├── downloaded/
|
61 |
+
│ ├── extracted/
|
62 |
+
│ └── processed/
|
63 |
+
└── transformers/
|
64 |
+
├── default/
|
65 |
+
│ ├── something/
|
66 |
+
├── bert-base-cased/
|
67 |
+
│ ├── default/
|
68 |
+
│ └── training/
|
69 |
+
hub/
|
70 |
+
└── models--julien-c--EsperBERTo-small/
|
71 |
+
├── blobs/
|
72 |
+
│ ├── (...)
|
73 |
+
│ ├── (...)
|
74 |
+
├── refs/
|
75 |
+
│ └── (...)
|
76 |
+
└── [ 128] snapshots/
|
77 |
+
├── 2439f60ef33a0d46d85da5001d52aeda5b00ce9f/
|
78 |
+
│ ├── (...)
|
79 |
+
└── bbc77c8132af1cc5cf678da3f1ddf2de43606d48/
|
80 |
+
└── (...)
|
81 |
+
```
|
82 |
+
|
83 |
+
|
84 |
+
Args:
|
85 |
+
library_name (`str`):
|
86 |
+
Name of the library that will manage the cache folder. Example: `"dataset"`.
|
87 |
+
namespace (`str`, *optional*, defaults to "default"):
|
88 |
+
Namespace to which the data belongs. Example: `"SQuAD"`.
|
89 |
+
subfolder (`str`, *optional*, defaults to "default"):
|
90 |
+
Subfolder in which the data will be stored. Example: `extracted`.
|
91 |
+
assets_dir (`str`, `Path`, *optional*):
|
92 |
+
Path to the folder where assets are cached. This must not be the same folder
|
93 |
+
where Hub files are cached. Defaults to `HF_HOME / "assets"` if not provided.
|
94 |
+
Can also be set with `HF_ASSETS_CACHE` environment variable.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
Path to the cache folder (`Path`).
|
98 |
+
|
99 |
+
Example:
|
100 |
+
```py
|
101 |
+
>>> from huggingface_hub import cached_assets_path
|
102 |
+
|
103 |
+
>>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="download")
|
104 |
+
PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/download')
|
105 |
+
|
106 |
+
>>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="extracted")
|
107 |
+
PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/extracted')
|
108 |
+
|
109 |
+
>>> cached_assets_path(library_name="datasets", namespace="Helsinki-NLP/tatoeba_mt")
|
110 |
+
PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/Helsinki-NLP--tatoeba_mt/default')
|
111 |
+
|
112 |
+
>>> cached_assets_path(library_name="datasets", assets_dir="/tmp/tmp123456")
|
113 |
+
PosixPath('/tmp/tmp123456/datasets/default/default')
|
114 |
+
```
|
115 |
+
"""
|
116 |
+
# Resolve assets_dir
|
117 |
+
if assets_dir is None:
|
118 |
+
assets_dir = HF_ASSETS_CACHE
|
119 |
+
assets_dir = Path(assets_dir).expanduser().resolve()
|
120 |
+
|
121 |
+
# Avoid names that could create path issues
|
122 |
+
for part in (" ", "/", "\\"):
|
123 |
+
library_name = library_name.replace(part, "--")
|
124 |
+
namespace = namespace.replace(part, "--")
|
125 |
+
subfolder = subfolder.replace(part, "--")
|
126 |
+
|
127 |
+
# Path to subfolder is created
|
128 |
+
path = assets_dir / library_name / namespace / subfolder
|
129 |
+
try:
|
130 |
+
path.mkdir(exist_ok=True, parents=True)
|
131 |
+
except (FileExistsError, NotADirectoryError):
|
132 |
+
raise ValueError(f"Corrupted assets folder: cannot create directory because of an existing file ({path}).")
|
133 |
+
|
134 |
+
# Return
|
135 |
+
return path
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_manager.py
ADDED
@@ -0,0 +1,896 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to manage the HF cache directory."""
|
16 |
+
|
17 |
+
import os
|
18 |
+
import shutil
|
19 |
+
import time
|
20 |
+
from collections import defaultdict
|
21 |
+
from dataclasses import dataclass
|
22 |
+
from pathlib import Path
|
23 |
+
from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
|
24 |
+
|
25 |
+
from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
|
26 |
+
|
27 |
+
from ..commands._cli_utils import tabulate
|
28 |
+
from ..constants import HF_HUB_CACHE
|
29 |
+
from . import logging
|
30 |
+
|
31 |
+
|
32 |
+
logger = logging.get_logger(__name__)
|
33 |
+
|
34 |
+
REPO_TYPE_T = Literal["model", "dataset", "space"]
|
35 |
+
|
36 |
+
# List of OS-created helper files that need to be ignored
|
37 |
+
FILES_TO_IGNORE = [".DS_Store"]
|
38 |
+
|
39 |
+
|
40 |
+
@dataclass(frozen=True)
|
41 |
+
class CachedFileInfo:
|
42 |
+
"""Frozen data structure holding information about a single cached file.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
file_name (`str`):
|
46 |
+
Name of the file. Example: `config.json`.
|
47 |
+
file_path (`Path`):
|
48 |
+
Path of the file in the `snapshots` directory. The file path is a symlink
|
49 |
+
referring to a blob in the `blobs` folder.
|
50 |
+
blob_path (`Path`):
|
51 |
+
Path of the blob file. This is equivalent to `file_path.resolve()`.
|
52 |
+
size_on_disk (`int`):
|
53 |
+
Size of the blob file in bytes.
|
54 |
+
blob_last_accessed (`float`):
|
55 |
+
Timestamp of the last time the blob file has been accessed (from any
|
56 |
+
revision).
|
57 |
+
blob_last_modified (`float`):
|
58 |
+
Timestamp of the last time the blob file has been modified/created.
|
59 |
+
|
60 |
+
<Tip warning={true}>
|
61 |
+
|
62 |
+
`blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
|
63 |
+
are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
64 |
+
for more details.
|
65 |
+
|
66 |
+
</Tip>
|
67 |
+
"""
|
68 |
+
|
69 |
+
file_name: str
|
70 |
+
file_path: Path
|
71 |
+
blob_path: Path
|
72 |
+
size_on_disk: int
|
73 |
+
|
74 |
+
blob_last_accessed: float
|
75 |
+
blob_last_modified: float
|
76 |
+
|
77 |
+
@property
|
78 |
+
def blob_last_accessed_str(self) -> str:
|
79 |
+
"""
|
80 |
+
(property) Timestamp of the last time the blob file has been accessed (from any
|
81 |
+
revision), returned as a human-readable string.
|
82 |
+
|
83 |
+
Example: "2 weeks ago".
|
84 |
+
"""
|
85 |
+
return _format_timesince(self.blob_last_accessed)
|
86 |
+
|
87 |
+
@property
|
88 |
+
def blob_last_modified_str(self) -> str:
|
89 |
+
"""
|
90 |
+
(property) Timestamp of the last time the blob file has been modified, returned
|
91 |
+
as a human-readable string.
|
92 |
+
|
93 |
+
Example: "2 weeks ago".
|
94 |
+
"""
|
95 |
+
return _format_timesince(self.blob_last_modified)
|
96 |
+
|
97 |
+
@property
|
98 |
+
def size_on_disk_str(self) -> str:
|
99 |
+
"""
|
100 |
+
(property) Size of the blob file as a human-readable string.
|
101 |
+
|
102 |
+
Example: "42.2K".
|
103 |
+
"""
|
104 |
+
return _format_size(self.size_on_disk)
|
105 |
+
|
106 |
+
|
107 |
+
@dataclass(frozen=True)
|
108 |
+
class CachedRevisionInfo:
|
109 |
+
"""Frozen data structure holding information about a revision.
|
110 |
+
|
111 |
+
A revision correspond to a folder in the `snapshots` folder and is populated with
|
112 |
+
the exact tree structure as the repo on the Hub but contains only symlinks. A
|
113 |
+
revision can be either referenced by 1 or more `refs` or be "detached" (no refs).
|
114 |
+
|
115 |
+
Args:
|
116 |
+
commit_hash (`str`):
|
117 |
+
Hash of the revision (unique).
|
118 |
+
Example: `"9338f7b671827df886678df2bdd7cc7b4f36dffd"`.
|
119 |
+
snapshot_path (`Path`):
|
120 |
+
Path to the revision directory in the `snapshots` folder. It contains the
|
121 |
+
exact tree structure as the repo on the Hub.
|
122 |
+
files: (`FrozenSet[CachedFileInfo]`):
|
123 |
+
Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
|
124 |
+
refs (`FrozenSet[str]`):
|
125 |
+
Set of `refs` pointing to this revision. If the revision has no `refs`, it
|
126 |
+
is considered detached.
|
127 |
+
Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
|
128 |
+
size_on_disk (`int`):
|
129 |
+
Sum of the blob file sizes that are symlink-ed by the revision.
|
130 |
+
last_modified (`float`):
|
131 |
+
Timestamp of the last time the revision has been created/modified.
|
132 |
+
|
133 |
+
<Tip warning={true}>
|
134 |
+
|
135 |
+
`last_accessed` cannot be determined correctly on a single revision as blob files
|
136 |
+
are shared across revisions.
|
137 |
+
|
138 |
+
</Tip>
|
139 |
+
|
140 |
+
<Tip warning={true}>
|
141 |
+
|
142 |
+
`size_on_disk` is not necessarily the sum of all file sizes because of possible
|
143 |
+
duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
144 |
+
size of folders and symlinks.
|
145 |
+
|
146 |
+
</Tip>
|
147 |
+
"""
|
148 |
+
|
149 |
+
commit_hash: str
|
150 |
+
snapshot_path: Path
|
151 |
+
size_on_disk: int
|
152 |
+
files: FrozenSet[CachedFileInfo]
|
153 |
+
refs: FrozenSet[str]
|
154 |
+
|
155 |
+
last_modified: float
|
156 |
+
|
157 |
+
@property
|
158 |
+
def last_modified_str(self) -> str:
|
159 |
+
"""
|
160 |
+
(property) Timestamp of the last time the revision has been modified, returned
|
161 |
+
as a human-readable string.
|
162 |
+
|
163 |
+
Example: "2 weeks ago".
|
164 |
+
"""
|
165 |
+
return _format_timesince(self.last_modified)
|
166 |
+
|
167 |
+
@property
|
168 |
+
def size_on_disk_str(self) -> str:
|
169 |
+
"""
|
170 |
+
(property) Sum of the blob file sizes as a human-readable string.
|
171 |
+
|
172 |
+
Example: "42.2K".
|
173 |
+
"""
|
174 |
+
return _format_size(self.size_on_disk)
|
175 |
+
|
176 |
+
@property
|
177 |
+
def nb_files(self) -> int:
|
178 |
+
"""
|
179 |
+
(property) Total number of files in the revision.
|
180 |
+
"""
|
181 |
+
return len(self.files)
|
182 |
+
|
183 |
+
|
184 |
+
@dataclass(frozen=True)
|
185 |
+
class CachedRepoInfo:
|
186 |
+
"""Frozen data structure holding information about a cached repository.
|
187 |
+
|
188 |
+
Args:
|
189 |
+
repo_id (`str`):
|
190 |
+
Repo id of the repo on the Hub. Example: `"google/fleurs"`.
|
191 |
+
repo_type (`Literal["dataset", "model", "space"]`):
|
192 |
+
Type of the cached repo.
|
193 |
+
repo_path (`Path`):
|
194 |
+
Local path to the cached repo.
|
195 |
+
size_on_disk (`int`):
|
196 |
+
Sum of the blob file sizes in the cached repo.
|
197 |
+
nb_files (`int`):
|
198 |
+
Total number of blob files in the cached repo.
|
199 |
+
revisions (`FrozenSet[CachedRevisionInfo]`):
|
200 |
+
Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
|
201 |
+
last_accessed (`float`):
|
202 |
+
Timestamp of the last time a blob file of the repo has been accessed.
|
203 |
+
last_modified (`float`):
|
204 |
+
Timestamp of the last time a blob file of the repo has been modified/created.
|
205 |
+
|
206 |
+
<Tip warning={true}>
|
207 |
+
|
208 |
+
`size_on_disk` is not necessarily the sum of all revisions sizes because of
|
209 |
+
duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
210 |
+
size of folders and symlinks.
|
211 |
+
|
212 |
+
</Tip>
|
213 |
+
|
214 |
+
<Tip warning={true}>
|
215 |
+
|
216 |
+
`last_accessed` and `last_modified` reliability can depend on the OS you are using.
|
217 |
+
See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
218 |
+
for more details.
|
219 |
+
|
220 |
+
</Tip>
|
221 |
+
"""
|
222 |
+
|
223 |
+
repo_id: str
|
224 |
+
repo_type: REPO_TYPE_T
|
225 |
+
repo_path: Path
|
226 |
+
size_on_disk: int
|
227 |
+
nb_files: int
|
228 |
+
revisions: FrozenSet[CachedRevisionInfo]
|
229 |
+
|
230 |
+
last_accessed: float
|
231 |
+
last_modified: float
|
232 |
+
|
233 |
+
@property
|
234 |
+
def last_accessed_str(self) -> str:
|
235 |
+
"""
|
236 |
+
(property) Last time a blob file of the repo has been accessed, returned as a
|
237 |
+
human-readable string.
|
238 |
+
|
239 |
+
Example: "2 weeks ago".
|
240 |
+
"""
|
241 |
+
return _format_timesince(self.last_accessed)
|
242 |
+
|
243 |
+
@property
|
244 |
+
def last_modified_str(self) -> str:
|
245 |
+
"""
|
246 |
+
(property) Last time a blob file of the repo has been modified, returned as a
|
247 |
+
human-readable string.
|
248 |
+
|
249 |
+
Example: "2 weeks ago".
|
250 |
+
"""
|
251 |
+
return _format_timesince(self.last_modified)
|
252 |
+
|
253 |
+
@property
|
254 |
+
def size_on_disk_str(self) -> str:
|
255 |
+
"""
|
256 |
+
(property) Sum of the blob file sizes as a human-readable string.
|
257 |
+
|
258 |
+
Example: "42.2K".
|
259 |
+
"""
|
260 |
+
return _format_size(self.size_on_disk)
|
261 |
+
|
262 |
+
@property
|
263 |
+
def refs(self) -> Dict[str, CachedRevisionInfo]:
|
264 |
+
"""
|
265 |
+
(property) Mapping between `refs` and revision data structures.
|
266 |
+
"""
|
267 |
+
return {ref: revision for revision in self.revisions for ref in revision.refs}
|
268 |
+
|
269 |
+
|
270 |
+
@dataclass(frozen=True)
|
271 |
+
class DeleteCacheStrategy:
|
272 |
+
"""Frozen data structure holding the strategy to delete cached revisions.
|
273 |
+
|
274 |
+
This object is not meant to be instantiated programmatically but to be returned by
|
275 |
+
[`~utils.HFCacheInfo.delete_revisions`]. See documentation for usage example.
|
276 |
+
|
277 |
+
Args:
|
278 |
+
expected_freed_size (`float`):
|
279 |
+
Expected freed size once strategy is executed.
|
280 |
+
blobs (`FrozenSet[Path]`):
|
281 |
+
Set of blob file paths to be deleted.
|
282 |
+
refs (`FrozenSet[Path]`):
|
283 |
+
Set of reference file paths to be deleted.
|
284 |
+
repos (`FrozenSet[Path]`):
|
285 |
+
Set of entire repo paths to be deleted.
|
286 |
+
snapshots (`FrozenSet[Path]`):
|
287 |
+
Set of snapshots to be deleted (directory of symlinks).
|
288 |
+
"""
|
289 |
+
|
290 |
+
expected_freed_size: int
|
291 |
+
blobs: FrozenSet[Path]
|
292 |
+
refs: FrozenSet[Path]
|
293 |
+
repos: FrozenSet[Path]
|
294 |
+
snapshots: FrozenSet[Path]
|
295 |
+
|
296 |
+
@property
|
297 |
+
def expected_freed_size_str(self) -> str:
|
298 |
+
"""
|
299 |
+
(property) Expected size that will be freed as a human-readable string.
|
300 |
+
|
301 |
+
Example: "42.2K".
|
302 |
+
"""
|
303 |
+
return _format_size(self.expected_freed_size)
|
304 |
+
|
305 |
+
def execute(self) -> None:
|
306 |
+
"""Execute the defined strategy.
|
307 |
+
|
308 |
+
<Tip warning={true}>
|
309 |
+
|
310 |
+
If this method is interrupted, the cache might get corrupted. Deletion order is
|
311 |
+
implemented so that references and symlinks are deleted before the actual blob
|
312 |
+
files.
|
313 |
+
|
314 |
+
</Tip>
|
315 |
+
|
316 |
+
<Tip warning={true}>
|
317 |
+
|
318 |
+
This method is irreversible. If executed, cached files are erased and must be
|
319 |
+
downloaded again.
|
320 |
+
|
321 |
+
</Tip>
|
322 |
+
"""
|
323 |
+
# Deletion order matters. Blobs are deleted in last so that the user can't end
|
324 |
+
# up in a state where a `ref`` refers to a missing snapshot or a snapshot
|
325 |
+
# symlink refers to a deleted blob.
|
326 |
+
|
327 |
+
# Delete entire repos
|
328 |
+
for path in self.repos:
|
329 |
+
_try_delete_path(path, path_type="repo")
|
330 |
+
|
331 |
+
# Delete snapshot directories
|
332 |
+
for path in self.snapshots:
|
333 |
+
_try_delete_path(path, path_type="snapshot")
|
334 |
+
|
335 |
+
# Delete refs files
|
336 |
+
for path in self.refs:
|
337 |
+
_try_delete_path(path, path_type="ref")
|
338 |
+
|
339 |
+
# Delete blob files
|
340 |
+
for path in self.blobs:
|
341 |
+
_try_delete_path(path, path_type="blob")
|
342 |
+
|
343 |
+
logger.info(f"Cache deletion done. Saved {self.expected_freed_size_str}.")
|
344 |
+
|
345 |
+
|
346 |
+
@dataclass(frozen=True)
|
347 |
+
class HFCacheInfo:
|
348 |
+
"""Frozen data structure holding information about the entire cache-system.
|
349 |
+
|
350 |
+
This data structure is returned by [`scan_cache_dir`] and is immutable.
|
351 |
+
|
352 |
+
Args:
|
353 |
+
size_on_disk (`int`):
|
354 |
+
Sum of all valid repo sizes in the cache-system.
|
355 |
+
repos (`FrozenSet[CachedRepoInfo]`):
|
356 |
+
Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
|
357 |
+
cache-system while scanning.
|
358 |
+
warnings (`List[CorruptedCacheException]`):
|
359 |
+
List of [`~CorruptedCacheException`] that occurred while scanning the cache.
|
360 |
+
Those exceptions are captured so that the scan can continue. Corrupted repos
|
361 |
+
are skipped from the scan.
|
362 |
+
|
363 |
+
<Tip warning={true}>
|
364 |
+
|
365 |
+
Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
|
366 |
+
some cached repos are corrupted, their sizes are not taken into account.
|
367 |
+
|
368 |
+
</Tip>
|
369 |
+
"""
|
370 |
+
|
371 |
+
size_on_disk: int
|
372 |
+
repos: FrozenSet[CachedRepoInfo]
|
373 |
+
warnings: List[CorruptedCacheException]
|
374 |
+
|
375 |
+
@property
|
376 |
+
def size_on_disk_str(self) -> str:
|
377 |
+
"""
|
378 |
+
(property) Sum of all valid repo sizes in the cache-system as a human-readable
|
379 |
+
string.
|
380 |
+
|
381 |
+
Example: "42.2K".
|
382 |
+
"""
|
383 |
+
return _format_size(self.size_on_disk)
|
384 |
+
|
385 |
+
def delete_revisions(self, *revisions: str) -> DeleteCacheStrategy:
|
386 |
+
"""Prepare the strategy to delete one or more revisions cached locally.
|
387 |
+
|
388 |
+
Input revisions can be any revision hash. If a revision hash is not found in the
|
389 |
+
local cache, a warning is thrown but no error is raised. Revisions can be from
|
390 |
+
different cached repos since hashes are unique across repos,
|
391 |
+
|
392 |
+
Examples:
|
393 |
+
```py
|
394 |
+
>>> from huggingface_hub import scan_cache_dir
|
395 |
+
>>> cache_info = scan_cache_dir()
|
396 |
+
>>> delete_strategy = cache_info.delete_revisions(
|
397 |
+
... "81fd1d6e7847c99f5862c9fb81387956d99ec7aa"
|
398 |
+
... )
|
399 |
+
>>> print(f"Will free {delete_strategy.expected_freed_size_str}.")
|
400 |
+
Will free 7.9K.
|
401 |
+
>>> delete_strategy.execute()
|
402 |
+
Cache deletion done. Saved 7.9K.
|
403 |
+
```
|
404 |
+
|
405 |
+
```py
|
406 |
+
>>> from huggingface_hub import scan_cache_dir
|
407 |
+
>>> scan_cache_dir().delete_revisions(
|
408 |
+
... "81fd1d6e7847c99f5862c9fb81387956d99ec7aa",
|
409 |
+
... "e2983b237dccf3ab4937c97fa717319a9ca1a96d",
|
410 |
+
... "6c0e6080953db56375760c0471a8c5f2929baf11",
|
411 |
+
... ).execute()
|
412 |
+
Cache deletion done. Saved 8.6G.
|
413 |
+
```
|
414 |
+
|
415 |
+
<Tip warning={true}>
|
416 |
+
|
417 |
+
`delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
|
418 |
+
be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
|
419 |
+
allows having a dry run before actually executing the deletion.
|
420 |
+
|
421 |
+
</Tip>
|
422 |
+
"""
|
423 |
+
hashes_to_delete: Set[str] = set(revisions)
|
424 |
+
|
425 |
+
repos_with_revisions: Dict[CachedRepoInfo, Set[CachedRevisionInfo]] = defaultdict(set)
|
426 |
+
|
427 |
+
for repo in self.repos:
|
428 |
+
for revision in repo.revisions:
|
429 |
+
if revision.commit_hash in hashes_to_delete:
|
430 |
+
repos_with_revisions[repo].add(revision)
|
431 |
+
hashes_to_delete.remove(revision.commit_hash)
|
432 |
+
|
433 |
+
if len(hashes_to_delete) > 0:
|
434 |
+
logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
|
435 |
+
|
436 |
+
delete_strategy_blobs: Set[Path] = set()
|
437 |
+
delete_strategy_refs: Set[Path] = set()
|
438 |
+
delete_strategy_repos: Set[Path] = set()
|
439 |
+
delete_strategy_snapshots: Set[Path] = set()
|
440 |
+
delete_strategy_expected_freed_size = 0
|
441 |
+
|
442 |
+
for affected_repo, revisions_to_delete in repos_with_revisions.items():
|
443 |
+
other_revisions = affected_repo.revisions - revisions_to_delete
|
444 |
+
|
445 |
+
# If no other revisions, it means all revisions are deleted
|
446 |
+
# -> delete the entire cached repo
|
447 |
+
if len(other_revisions) == 0:
|
448 |
+
delete_strategy_repos.add(affected_repo.repo_path)
|
449 |
+
delete_strategy_expected_freed_size += affected_repo.size_on_disk
|
450 |
+
continue
|
451 |
+
|
452 |
+
# Some revisions of the repo will be deleted but not all. We need to filter
|
453 |
+
# which blob files will not be linked anymore.
|
454 |
+
for revision_to_delete in revisions_to_delete:
|
455 |
+
# Snapshot dir
|
456 |
+
delete_strategy_snapshots.add(revision_to_delete.snapshot_path)
|
457 |
+
|
458 |
+
# Refs dir
|
459 |
+
for ref in revision_to_delete.refs:
|
460 |
+
delete_strategy_refs.add(affected_repo.repo_path / "refs" / ref)
|
461 |
+
|
462 |
+
# Blobs dir
|
463 |
+
for file in revision_to_delete.files:
|
464 |
+
if file.blob_path not in delete_strategy_blobs:
|
465 |
+
is_file_alone = True
|
466 |
+
for revision in other_revisions:
|
467 |
+
for rev_file in revision.files:
|
468 |
+
if file.blob_path == rev_file.blob_path:
|
469 |
+
is_file_alone = False
|
470 |
+
break
|
471 |
+
if not is_file_alone:
|
472 |
+
break
|
473 |
+
|
474 |
+
# Blob file not referenced by remaining revisions -> delete
|
475 |
+
if is_file_alone:
|
476 |
+
delete_strategy_blobs.add(file.blob_path)
|
477 |
+
delete_strategy_expected_freed_size += file.size_on_disk
|
478 |
+
|
479 |
+
# Return the strategy instead of executing it.
|
480 |
+
return DeleteCacheStrategy(
|
481 |
+
blobs=frozenset(delete_strategy_blobs),
|
482 |
+
refs=frozenset(delete_strategy_refs),
|
483 |
+
repos=frozenset(delete_strategy_repos),
|
484 |
+
snapshots=frozenset(delete_strategy_snapshots),
|
485 |
+
expected_freed_size=delete_strategy_expected_freed_size,
|
486 |
+
)
|
487 |
+
|
488 |
+
def export_as_table(self, *, verbosity: int = 0) -> str:
|
489 |
+
"""Generate a table from the [`HFCacheInfo`] object.
|
490 |
+
|
491 |
+
Pass `verbosity=0` to get a table with a single row per repo, with columns
|
492 |
+
"repo_id", "repo_type", "size_on_disk", "nb_files", "last_accessed", "last_modified", "refs", "local_path".
|
493 |
+
|
494 |
+
Pass `verbosity=1` to get a table with a row per repo and revision (thus multiple rows can appear for a single repo), with columns
|
495 |
+
"repo_id", "repo_type", "revision", "size_on_disk", "nb_files", "last_modified", "refs", "local_path".
|
496 |
+
|
497 |
+
Example:
|
498 |
+
```py
|
499 |
+
>>> from huggingface_hub.utils import scan_cache_dir
|
500 |
+
|
501 |
+
>>> hf_cache_info = scan_cache_dir()
|
502 |
+
HFCacheInfo(...)
|
503 |
+
|
504 |
+
>>> print(hf_cache_info.export_as_table())
|
505 |
+
REPO ID REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
|
506 |
+
--------------------------------------------------- --------- ------------ -------- ------------- ------------- ---- --------------------------------------------------------------------------------------------------
|
507 |
+
roberta-base model 2.7M 5 1 day ago 1 week ago main ~/.cache/huggingface/hub/models--roberta-base
|
508 |
+
suno/bark model 8.8K 1 1 week ago 1 week ago main ~/.cache/huggingface/hub/models--suno--bark
|
509 |
+
t5-base model 893.8M 4 4 days ago 7 months ago main ~/.cache/huggingface/hub/models--t5-base
|
510 |
+
t5-large model 3.0G 4 5 weeks ago 5 months ago main ~/.cache/huggingface/hub/models--t5-large
|
511 |
+
|
512 |
+
>>> print(hf_cache_info.export_as_table(verbosity=1))
|
513 |
+
REPO ID REPO TYPE REVISION SIZE ON DISK NB FILES LAST_MODIFIED REFS LOCAL PATH
|
514 |
+
--------------------------------------------------- --------- ---------------------------------------- ------------ -------- ------------- ---- -----------------------------------------------------------------------------------------------------------------------------------------------------
|
515 |
+
roberta-base model e2da8e2f811d1448a5b465c236feacd80ffbac7b 2.7M 5 1 week ago main ~/.cache/huggingface/hub/models--roberta-base/snapshots/e2da8e2f811d1448a5b465c236feacd80ffbac7b
|
516 |
+
suno/bark model 70a8a7d34168586dc5d028fa9666aceade177992 8.8K 1 1 week ago main ~/.cache/huggingface/hub/models--suno--bark/snapshots/70a8a7d34168586dc5d028fa9666aceade177992
|
517 |
+
t5-base model a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1 893.8M 4 7 months ago main ~/.cache/huggingface/hub/models--t5-base/snapshots/a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1
|
518 |
+
t5-large model 150ebc2c4b72291e770f58e6057481c8d2ed331a 3.0G 4 5 months ago main ~/.cache/huggingface/hub/models--t5-large/snapshots/150ebc2c4b72291e770f58e6057481c8d2ed331a
|
519 |
+
```
|
520 |
+
|
521 |
+
Args:
|
522 |
+
verbosity (`int`, *optional*):
|
523 |
+
The verbosity level. Defaults to 0.
|
524 |
+
|
525 |
+
Returns:
|
526 |
+
`str`: The table as a string.
|
527 |
+
"""
|
528 |
+
if verbosity == 0:
|
529 |
+
return tabulate(
|
530 |
+
rows=[
|
531 |
+
[
|
532 |
+
repo.repo_id,
|
533 |
+
repo.repo_type,
|
534 |
+
"{:>12}".format(repo.size_on_disk_str),
|
535 |
+
repo.nb_files,
|
536 |
+
repo.last_accessed_str,
|
537 |
+
repo.last_modified_str,
|
538 |
+
", ".join(sorted(repo.refs)),
|
539 |
+
str(repo.repo_path),
|
540 |
+
]
|
541 |
+
for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
|
542 |
+
],
|
543 |
+
headers=[
|
544 |
+
"REPO ID",
|
545 |
+
"REPO TYPE",
|
546 |
+
"SIZE ON DISK",
|
547 |
+
"NB FILES",
|
548 |
+
"LAST_ACCESSED",
|
549 |
+
"LAST_MODIFIED",
|
550 |
+
"REFS",
|
551 |
+
"LOCAL PATH",
|
552 |
+
],
|
553 |
+
)
|
554 |
+
else:
|
555 |
+
return tabulate(
|
556 |
+
rows=[
|
557 |
+
[
|
558 |
+
repo.repo_id,
|
559 |
+
repo.repo_type,
|
560 |
+
revision.commit_hash,
|
561 |
+
"{:>12}".format(revision.size_on_disk_str),
|
562 |
+
revision.nb_files,
|
563 |
+
revision.last_modified_str,
|
564 |
+
", ".join(sorted(revision.refs)),
|
565 |
+
str(revision.snapshot_path),
|
566 |
+
]
|
567 |
+
for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
|
568 |
+
for revision in sorted(repo.revisions, key=lambda revision: revision.commit_hash)
|
569 |
+
],
|
570 |
+
headers=[
|
571 |
+
"REPO ID",
|
572 |
+
"REPO TYPE",
|
573 |
+
"REVISION",
|
574 |
+
"SIZE ON DISK",
|
575 |
+
"NB FILES",
|
576 |
+
"LAST_MODIFIED",
|
577 |
+
"REFS",
|
578 |
+
"LOCAL PATH",
|
579 |
+
],
|
580 |
+
)
|
581 |
+
|
582 |
+
|
583 |
+
def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
584 |
+
"""Scan the entire HF cache-system and return a [`~HFCacheInfo`] structure.
|
585 |
+
|
586 |
+
Use `scan_cache_dir` in order to programmatically scan your cache-system. The cache
|
587 |
+
will be scanned repo by repo. If a repo is corrupted, a [`~CorruptedCacheException`]
|
588 |
+
will be thrown internally but captured and returned in the [`~HFCacheInfo`]
|
589 |
+
structure. Only valid repos get a proper report.
|
590 |
+
|
591 |
+
```py
|
592 |
+
>>> from huggingface_hub import scan_cache_dir
|
593 |
+
|
594 |
+
>>> hf_cache_info = scan_cache_dir()
|
595 |
+
HFCacheInfo(
|
596 |
+
size_on_disk=3398085269,
|
597 |
+
repos=frozenset({
|
598 |
+
CachedRepoInfo(
|
599 |
+
repo_id='t5-small',
|
600 |
+
repo_type='model',
|
601 |
+
repo_path=PosixPath(...),
|
602 |
+
size_on_disk=970726914,
|
603 |
+
nb_files=11,
|
604 |
+
revisions=frozenset({
|
605 |
+
CachedRevisionInfo(
|
606 |
+
commit_hash='d78aea13fa7ecd06c29e3e46195d6341255065d5',
|
607 |
+
size_on_disk=970726339,
|
608 |
+
snapshot_path=PosixPath(...),
|
609 |
+
files=frozenset({
|
610 |
+
CachedFileInfo(
|
611 |
+
file_name='config.json',
|
612 |
+
size_on_disk=1197
|
613 |
+
file_path=PosixPath(...),
|
614 |
+
blob_path=PosixPath(...),
|
615 |
+
),
|
616 |
+
CachedFileInfo(...),
|
617 |
+
...
|
618 |
+
}),
|
619 |
+
),
|
620 |
+
CachedRevisionInfo(...),
|
621 |
+
...
|
622 |
+
}),
|
623 |
+
),
|
624 |
+
CachedRepoInfo(...),
|
625 |
+
...
|
626 |
+
}),
|
627 |
+
warnings=[
|
628 |
+
CorruptedCacheException("Snapshots dir doesn't exist in cached repo: ..."),
|
629 |
+
CorruptedCacheException(...),
|
630 |
+
...
|
631 |
+
],
|
632 |
+
)
|
633 |
+
```
|
634 |
+
|
635 |
+
You can also print a detailed report directly from the `huggingface-cli` using:
|
636 |
+
```text
|
637 |
+
> huggingface-cli scan-cache
|
638 |
+
REPO ID REPO TYPE SIZE ON DISK NB FILES REFS LOCAL PATH
|
639 |
+
--------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
|
640 |
+
glue dataset 116.3K 15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
|
641 |
+
google/fleurs dataset 64.9M 6 main, refs/pr/1 /Users/lucain/.cache/huggingface/hub/datasets--google--fleurs
|
642 |
+
Jean-Baptiste/camembert-ner model 441.0M 7 main /Users/lucain/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner
|
643 |
+
bert-base-cased model 1.9G 13 main /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
|
644 |
+
t5-base model 10.1K 3 main /Users/lucain/.cache/huggingface/hub/models--t5-base
|
645 |
+
t5-small model 970.7M 11 refs/pr/1, main /Users/lucain/.cache/huggingface/hub/models--t5-small
|
646 |
+
|
647 |
+
Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
|
648 |
+
Got 1 warning(s) while scanning. Use -vvv to print details.
|
649 |
+
```
|
650 |
+
|
651 |
+
Args:
|
652 |
+
cache_dir (`str` or `Path`, `optional`):
|
653 |
+
Cache directory to cache. Defaults to the default HF cache directory.
|
654 |
+
|
655 |
+
<Tip warning={true}>
|
656 |
+
|
657 |
+
Raises:
|
658 |
+
|
659 |
+
`CacheNotFound`
|
660 |
+
If the cache directory does not exist.
|
661 |
+
|
662 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
663 |
+
If the cache directory is a file, instead of a directory.
|
664 |
+
|
665 |
+
</Tip>
|
666 |
+
|
667 |
+
Returns: a [`~HFCacheInfo`] object.
|
668 |
+
"""
|
669 |
+
if cache_dir is None:
|
670 |
+
cache_dir = HF_HUB_CACHE
|
671 |
+
|
672 |
+
cache_dir = Path(cache_dir).expanduser().resolve()
|
673 |
+
if not cache_dir.exists():
|
674 |
+
raise CacheNotFound(
|
675 |
+
f"Cache directory not found: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable.",
|
676 |
+
cache_dir=cache_dir,
|
677 |
+
)
|
678 |
+
|
679 |
+
if cache_dir.is_file():
|
680 |
+
raise ValueError(
|
681 |
+
f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
|
682 |
+
)
|
683 |
+
|
684 |
+
repos: Set[CachedRepoInfo] = set()
|
685 |
+
warnings: List[CorruptedCacheException] = []
|
686 |
+
for repo_path in cache_dir.iterdir():
|
687 |
+
if repo_path.name == ".locks": # skip './.locks/' folder
|
688 |
+
continue
|
689 |
+
try:
|
690 |
+
repos.add(_scan_cached_repo(repo_path))
|
691 |
+
except CorruptedCacheException as e:
|
692 |
+
warnings.append(e)
|
693 |
+
|
694 |
+
return HFCacheInfo(
|
695 |
+
repos=frozenset(repos),
|
696 |
+
size_on_disk=sum(repo.size_on_disk for repo in repos),
|
697 |
+
warnings=warnings,
|
698 |
+
)
|
699 |
+
|
700 |
+
|
701 |
+
def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
702 |
+
"""Scan a single cache repo and return information about it.
|
703 |
+
|
704 |
+
Any unexpected behavior will raise a [`~CorruptedCacheException`].
|
705 |
+
"""
|
706 |
+
if not repo_path.is_dir():
|
707 |
+
raise CorruptedCacheException(f"Repo path is not a directory: {repo_path}")
|
708 |
+
|
709 |
+
if "--" not in repo_path.name:
|
710 |
+
raise CorruptedCacheException(f"Repo path is not a valid HuggingFace cache directory: {repo_path}")
|
711 |
+
|
712 |
+
repo_type, repo_id = repo_path.name.split("--", maxsplit=1)
|
713 |
+
repo_type = repo_type[:-1] # "models" -> "model"
|
714 |
+
repo_id = repo_id.replace("--", "/") # google/fleurs -> "google/fleurs"
|
715 |
+
|
716 |
+
if repo_type not in {"dataset", "model", "space"}:
|
717 |
+
raise CorruptedCacheException(
|
718 |
+
f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
|
719 |
+
)
|
720 |
+
|
721 |
+
blob_stats: Dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
|
722 |
+
|
723 |
+
snapshots_path = repo_path / "snapshots"
|
724 |
+
refs_path = repo_path / "refs"
|
725 |
+
|
726 |
+
if not snapshots_path.exists() or not snapshots_path.is_dir():
|
727 |
+
raise CorruptedCacheException(f"Snapshots dir doesn't exist in cached repo: {snapshots_path}")
|
728 |
+
|
729 |
+
# Scan over `refs` directory
|
730 |
+
|
731 |
+
# key is revision hash, value is set of refs
|
732 |
+
refs_by_hash: Dict[str, Set[str]] = defaultdict(set)
|
733 |
+
if refs_path.exists():
|
734 |
+
# Example of `refs` directory
|
735 |
+
# ── refs
|
736 |
+
# ├── main
|
737 |
+
# └── refs
|
738 |
+
# └── pr
|
739 |
+
# └── 1
|
740 |
+
if refs_path.is_file():
|
741 |
+
raise CorruptedCacheException(f"Refs directory cannot be a file: {refs_path}")
|
742 |
+
|
743 |
+
for ref_path in refs_path.glob("**/*"):
|
744 |
+
# glob("**/*") iterates over all files and directories -> skip directories
|
745 |
+
if ref_path.is_dir() or ref_path.name in FILES_TO_IGNORE:
|
746 |
+
continue
|
747 |
+
|
748 |
+
ref_name = str(ref_path.relative_to(refs_path))
|
749 |
+
with ref_path.open() as f:
|
750 |
+
commit_hash = f.read()
|
751 |
+
|
752 |
+
refs_by_hash[commit_hash].add(ref_name)
|
753 |
+
|
754 |
+
# Scan snapshots directory
|
755 |
+
cached_revisions: Set[CachedRevisionInfo] = set()
|
756 |
+
for revision_path in snapshots_path.iterdir():
|
757 |
+
# Ignore OS-created helper files
|
758 |
+
if revision_path.name in FILES_TO_IGNORE:
|
759 |
+
continue
|
760 |
+
if revision_path.is_file():
|
761 |
+
raise CorruptedCacheException(f"Snapshots folder corrupted. Found a file: {revision_path}")
|
762 |
+
|
763 |
+
cached_files = set()
|
764 |
+
for file_path in revision_path.glob("**/*"):
|
765 |
+
# glob("**/*") iterates over all files and directories -> skip directories
|
766 |
+
if file_path.is_dir():
|
767 |
+
continue
|
768 |
+
|
769 |
+
blob_path = Path(file_path).resolve()
|
770 |
+
if not blob_path.exists():
|
771 |
+
raise CorruptedCacheException(f"Blob missing (broken symlink): {blob_path}")
|
772 |
+
|
773 |
+
if blob_path not in blob_stats:
|
774 |
+
blob_stats[blob_path] = blob_path.stat()
|
775 |
+
|
776 |
+
cached_files.add(
|
777 |
+
CachedFileInfo(
|
778 |
+
file_name=file_path.name,
|
779 |
+
file_path=file_path,
|
780 |
+
size_on_disk=blob_stats[blob_path].st_size,
|
781 |
+
blob_path=blob_path,
|
782 |
+
blob_last_accessed=blob_stats[blob_path].st_atime,
|
783 |
+
blob_last_modified=blob_stats[blob_path].st_mtime,
|
784 |
+
)
|
785 |
+
)
|
786 |
+
|
787 |
+
# Last modified is either the last modified blob file or the revision folder
|
788 |
+
# itself if it is empty
|
789 |
+
if len(cached_files) > 0:
|
790 |
+
revision_last_modified = max(blob_stats[file.blob_path].st_mtime for file in cached_files)
|
791 |
+
else:
|
792 |
+
revision_last_modified = revision_path.stat().st_mtime
|
793 |
+
|
794 |
+
cached_revisions.add(
|
795 |
+
CachedRevisionInfo(
|
796 |
+
commit_hash=revision_path.name,
|
797 |
+
files=frozenset(cached_files),
|
798 |
+
refs=frozenset(refs_by_hash.pop(revision_path.name, set())),
|
799 |
+
size_on_disk=sum(
|
800 |
+
blob_stats[blob_path].st_size for blob_path in set(file.blob_path for file in cached_files)
|
801 |
+
),
|
802 |
+
snapshot_path=revision_path,
|
803 |
+
last_modified=revision_last_modified,
|
804 |
+
)
|
805 |
+
)
|
806 |
+
|
807 |
+
# Check that all refs referred to an existing revision
|
808 |
+
if len(refs_by_hash) > 0:
|
809 |
+
raise CorruptedCacheException(
|
810 |
+
f"Reference(s) refer to missing commit hashes: {dict(refs_by_hash)} ({repo_path})."
|
811 |
+
)
|
812 |
+
|
813 |
+
# Last modified is either the last modified blob file or the repo folder itself if
|
814 |
+
# no blob files has been found. Same for last accessed.
|
815 |
+
if len(blob_stats) > 0:
|
816 |
+
repo_last_accessed = max(stat.st_atime for stat in blob_stats.values())
|
817 |
+
repo_last_modified = max(stat.st_mtime for stat in blob_stats.values())
|
818 |
+
else:
|
819 |
+
repo_stats = repo_path.stat()
|
820 |
+
repo_last_accessed = repo_stats.st_atime
|
821 |
+
repo_last_modified = repo_stats.st_mtime
|
822 |
+
|
823 |
+
# Build and return frozen structure
|
824 |
+
return CachedRepoInfo(
|
825 |
+
nb_files=len(blob_stats),
|
826 |
+
repo_id=repo_id,
|
827 |
+
repo_path=repo_path,
|
828 |
+
repo_type=repo_type, # type: ignore
|
829 |
+
revisions=frozenset(cached_revisions),
|
830 |
+
size_on_disk=sum(stat.st_size for stat in blob_stats.values()),
|
831 |
+
last_accessed=repo_last_accessed,
|
832 |
+
last_modified=repo_last_modified,
|
833 |
+
)
|
834 |
+
|
835 |
+
|
836 |
+
def _format_size(num: int) -> str:
|
837 |
+
"""Format size in bytes into a human-readable string.
|
838 |
+
|
839 |
+
Taken from https://stackoverflow.com/a/1094933
|
840 |
+
"""
|
841 |
+
num_f = float(num)
|
842 |
+
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
|
843 |
+
if abs(num_f) < 1000.0:
|
844 |
+
return f"{num_f:3.1f}{unit}"
|
845 |
+
num_f /= 1000.0
|
846 |
+
return f"{num_f:.1f}Y"
|
847 |
+
|
848 |
+
|
849 |
+
_TIMESINCE_CHUNKS = (
|
850 |
+
# Label, divider, max value
|
851 |
+
("second", 1, 60),
|
852 |
+
("minute", 60, 60),
|
853 |
+
("hour", 60 * 60, 24),
|
854 |
+
("day", 60 * 60 * 24, 6),
|
855 |
+
("week", 60 * 60 * 24 * 7, 6),
|
856 |
+
("month", 60 * 60 * 24 * 30, 11),
|
857 |
+
("year", 60 * 60 * 24 * 365, None),
|
858 |
+
)
|
859 |
+
|
860 |
+
|
861 |
+
def _format_timesince(ts: float) -> str:
|
862 |
+
"""Format timestamp in seconds into a human-readable string, relative to now.
|
863 |
+
|
864 |
+
Vaguely inspired by Django's `timesince` formatter.
|
865 |
+
"""
|
866 |
+
delta = time.time() - ts
|
867 |
+
if delta < 20:
|
868 |
+
return "a few seconds ago"
|
869 |
+
for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
|
870 |
+
value = round(delta / divider)
|
871 |
+
if max_value is not None and value <= max_value:
|
872 |
+
break
|
873 |
+
return f"{value} {label}{'s' if value > 1 else ''} ago"
|
874 |
+
|
875 |
+
|
876 |
+
def _try_delete_path(path: Path, path_type: str) -> None:
|
877 |
+
"""Try to delete a local file or folder.
|
878 |
+
|
879 |
+
If the path does not exists, error is logged as a warning and then ignored.
|
880 |
+
|
881 |
+
Args:
|
882 |
+
path (`Path`)
|
883 |
+
Path to delete. Can be a file or a folder.
|
884 |
+
path_type (`str`)
|
885 |
+
What path are we deleting ? Only for logging purposes. Example: "snapshot".
|
886 |
+
"""
|
887 |
+
logger.info(f"Delete {path_type}: {path}")
|
888 |
+
try:
|
889 |
+
if path.is_file():
|
890 |
+
os.remove(path)
|
891 |
+
else:
|
892 |
+
shutil.rmtree(path)
|
893 |
+
except FileNotFoundError:
|
894 |
+
logger.warning(f"Couldn't delete {path_type}: file not found ({path})", exc_info=True)
|
895 |
+
except PermissionError:
|
896 |
+
logger.warning(f"Couldn't delete {path_type}: permission denied ({path})", exc_info=True)
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_datetime.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle datetimes in Huggingface Hub."""
|
16 |
+
|
17 |
+
from datetime import datetime, timezone
|
18 |
+
|
19 |
+
|
20 |
+
def parse_datetime(date_string: str) -> datetime:
|
21 |
+
"""
|
22 |
+
Parses a date_string returned from the server to a datetime object.
|
23 |
+
|
24 |
+
This parser is a weak-parser is the sense that it handles only a single format of
|
25 |
+
date_string. It is expected that the server format will never change. The
|
26 |
+
implementation depends only on the standard lib to avoid an external dependency
|
27 |
+
(python-dateutil). See full discussion about this decision on PR:
|
28 |
+
https://github.com/huggingface/huggingface_hub/pull/999.
|
29 |
+
|
30 |
+
Example:
|
31 |
+
```py
|
32 |
+
> parse_datetime('2022-08-19T07:19:38.123Z')
|
33 |
+
datetime.datetime(2022, 8, 19, 7, 19, 38, 123000, tzinfo=timezone.utc)
|
34 |
+
```
|
35 |
+
|
36 |
+
Args:
|
37 |
+
date_string (`str`):
|
38 |
+
A string representing a datetime returned by the Hub server.
|
39 |
+
String is expected to follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern.
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
A python datetime object.
|
43 |
+
|
44 |
+
Raises:
|
45 |
+
:class:`ValueError`:
|
46 |
+
If `date_string` cannot be parsed.
|
47 |
+
"""
|
48 |
+
try:
|
49 |
+
# Normalize the string to always have 6 digits of fractional seconds
|
50 |
+
if date_string.endswith("Z"):
|
51 |
+
# Case 1: No decimal point (e.g., "2024-11-16T00:27:02Z")
|
52 |
+
if "." not in date_string:
|
53 |
+
# No fractional seconds - insert .000000
|
54 |
+
date_string = date_string[:-1] + ".000000Z"
|
55 |
+
# Case 2: Has decimal point (e.g., "2022-08-19T07:19:38.123456789Z")
|
56 |
+
else:
|
57 |
+
# Get the fractional and base parts
|
58 |
+
base, fraction = date_string[:-1].split(".")
|
59 |
+
# fraction[:6] takes first 6 digits and :0<6 pads with zeros if less than 6 digits
|
60 |
+
date_string = f"{base}.{fraction[:6]:0<6}Z"
|
61 |
+
|
62 |
+
return datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
|
63 |
+
except ValueError as e:
|
64 |
+
raise ValueError(
|
65 |
+
f"Cannot parse '{date_string}' as a datetime. Date string is expected to"
|
66 |
+
" follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern."
|
67 |
+
) from e
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_deprecation.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
from functools import wraps
|
3 |
+
from inspect import Parameter, signature
|
4 |
+
from typing import Iterable, Optional
|
5 |
+
|
6 |
+
|
7 |
+
def _deprecate_positional_args(*, version: str):
|
8 |
+
"""Decorator for methods that issues warnings for positional arguments.
|
9 |
+
Using the keyword-only argument syntax in pep 3102, arguments after the
|
10 |
+
* will issue a warning when passed as a positional argument.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
version (`str`):
|
14 |
+
The version when positional arguments will result in error.
|
15 |
+
"""
|
16 |
+
|
17 |
+
def _inner_deprecate_positional_args(f):
|
18 |
+
sig = signature(f)
|
19 |
+
kwonly_args = []
|
20 |
+
all_args = []
|
21 |
+
for name, param in sig.parameters.items():
|
22 |
+
if param.kind == Parameter.POSITIONAL_OR_KEYWORD:
|
23 |
+
all_args.append(name)
|
24 |
+
elif param.kind == Parameter.KEYWORD_ONLY:
|
25 |
+
kwonly_args.append(name)
|
26 |
+
|
27 |
+
@wraps(f)
|
28 |
+
def inner_f(*args, **kwargs):
|
29 |
+
extra_args = len(args) - len(all_args)
|
30 |
+
if extra_args <= 0:
|
31 |
+
return f(*args, **kwargs)
|
32 |
+
# extra_args > 0
|
33 |
+
args_msg = [
|
34 |
+
f"{name}='{arg}'" if isinstance(arg, str) else f"{name}={arg}"
|
35 |
+
for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:])
|
36 |
+
]
|
37 |
+
args_msg = ", ".join(args_msg)
|
38 |
+
warnings.warn(
|
39 |
+
f"Deprecated positional argument(s) used in '{f.__name__}': pass"
|
40 |
+
f" {args_msg} as keyword args. From version {version} passing these"
|
41 |
+
" as positional arguments will result in an error,",
|
42 |
+
FutureWarning,
|
43 |
+
)
|
44 |
+
kwargs.update(zip(sig.parameters, args))
|
45 |
+
return f(**kwargs)
|
46 |
+
|
47 |
+
return inner_f
|
48 |
+
|
49 |
+
return _inner_deprecate_positional_args
|
50 |
+
|
51 |
+
|
52 |
+
def _deprecate_arguments(
|
53 |
+
*,
|
54 |
+
version: str,
|
55 |
+
deprecated_args: Iterable[str],
|
56 |
+
custom_message: Optional[str] = None,
|
57 |
+
):
|
58 |
+
"""Decorator to issue warnings when using deprecated arguments.
|
59 |
+
|
60 |
+
TODO: could be useful to be able to set a custom error message.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
version (`str`):
|
64 |
+
The version when deprecated arguments will result in error.
|
65 |
+
deprecated_args (`List[str]`):
|
66 |
+
List of the arguments to be deprecated.
|
67 |
+
custom_message (`str`, *optional*):
|
68 |
+
Warning message that is raised. If not passed, a default warning message
|
69 |
+
will be created.
|
70 |
+
"""
|
71 |
+
|
72 |
+
def _inner_deprecate_positional_args(f):
|
73 |
+
sig = signature(f)
|
74 |
+
|
75 |
+
@wraps(f)
|
76 |
+
def inner_f(*args, **kwargs):
|
77 |
+
# Check for used deprecated arguments
|
78 |
+
used_deprecated_args = []
|
79 |
+
for _, parameter in zip(args, sig.parameters.values()):
|
80 |
+
if parameter.name in deprecated_args:
|
81 |
+
used_deprecated_args.append(parameter.name)
|
82 |
+
for kwarg_name, kwarg_value in kwargs.items():
|
83 |
+
if (
|
84 |
+
# If argument is deprecated but still used
|
85 |
+
kwarg_name in deprecated_args
|
86 |
+
# And then the value is not the default value
|
87 |
+
and kwarg_value != sig.parameters[kwarg_name].default
|
88 |
+
):
|
89 |
+
used_deprecated_args.append(kwarg_name)
|
90 |
+
|
91 |
+
# Warn and proceed
|
92 |
+
if len(used_deprecated_args) > 0:
|
93 |
+
message = (
|
94 |
+
f"Deprecated argument(s) used in '{f.__name__}':"
|
95 |
+
f" {', '.join(used_deprecated_args)}. Will not be supported from"
|
96 |
+
f" version '{version}'."
|
97 |
+
)
|
98 |
+
if custom_message is not None:
|
99 |
+
message += "\n\n" + custom_message
|
100 |
+
warnings.warn(message, FutureWarning)
|
101 |
+
return f(*args, **kwargs)
|
102 |
+
|
103 |
+
return inner_f
|
104 |
+
|
105 |
+
return _inner_deprecate_positional_args
|
106 |
+
|
107 |
+
|
108 |
+
def _deprecate_method(*, version: str, message: Optional[str] = None):
|
109 |
+
"""Decorator to issue warnings when using a deprecated method.
|
110 |
+
|
111 |
+
Args:
|
112 |
+
version (`str`):
|
113 |
+
The version when deprecated arguments will result in error.
|
114 |
+
message (`str`, *optional*):
|
115 |
+
Warning message that is raised. If not passed, a default warning message
|
116 |
+
will be created.
|
117 |
+
"""
|
118 |
+
|
119 |
+
def _inner_deprecate_method(f):
|
120 |
+
name = f.__name__
|
121 |
+
if name == "__init__":
|
122 |
+
name = f.__qualname__.split(".")[0] # class name instead of method name
|
123 |
+
|
124 |
+
@wraps(f)
|
125 |
+
def inner_f(*args, **kwargs):
|
126 |
+
warning_message = (
|
127 |
+
f"'{name}' (from '{f.__module__}') is deprecated and will be removed from version '{version}'."
|
128 |
+
)
|
129 |
+
if message is not None:
|
130 |
+
warning_message += " " + message
|
131 |
+
warnings.warn(warning_message, FutureWarning)
|
132 |
+
return f(*args, **kwargs)
|
133 |
+
|
134 |
+
return inner_f
|
135 |
+
|
136 |
+
return _inner_deprecate_method
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_experimental.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2023-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to flag a feature as "experimental" in Huggingface Hub."""
|
16 |
+
|
17 |
+
import warnings
|
18 |
+
from functools import wraps
|
19 |
+
from typing import Callable
|
20 |
+
|
21 |
+
from .. import constants
|
22 |
+
|
23 |
+
|
24 |
+
def experimental(fn: Callable) -> Callable:
|
25 |
+
"""Decorator to flag a feature as experimental.
|
26 |
+
|
27 |
+
An experimental feature triggers a warning when used as it might be subject to breaking changes without prior notice
|
28 |
+
in the future.
|
29 |
+
|
30 |
+
Warnings can be disabled by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
fn (`Callable`):
|
34 |
+
The function to flag as experimental.
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
`Callable`: The decorated function.
|
38 |
+
|
39 |
+
Example:
|
40 |
+
|
41 |
+
```python
|
42 |
+
>>> from huggingface_hub.utils import experimental
|
43 |
+
|
44 |
+
>>> @experimental
|
45 |
+
... def my_function():
|
46 |
+
... print("Hello world!")
|
47 |
+
|
48 |
+
>>> my_function()
|
49 |
+
UserWarning: 'my_function' is experimental and might be subject to breaking changes in the future without prior
|
50 |
+
notice. You can disable this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
|
51 |
+
Hello world!
|
52 |
+
```
|
53 |
+
"""
|
54 |
+
# For classes, put the "experimental" around the "__new__" method => __new__ will be removed in warning message
|
55 |
+
name = fn.__qualname__[: -len(".__new__")] if fn.__qualname__.endswith(".__new__") else fn.__qualname__
|
56 |
+
|
57 |
+
@wraps(fn)
|
58 |
+
def _inner_fn(*args, **kwargs):
|
59 |
+
if not constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING:
|
60 |
+
warnings.warn(
|
61 |
+
f"'{name}' is experimental and might be subject to breaking changes in the future without prior notice."
|
62 |
+
" You can disable this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment"
|
63 |
+
" variable.",
|
64 |
+
UserWarning,
|
65 |
+
)
|
66 |
+
return fn(*args, **kwargs)
|
67 |
+
|
68 |
+
return _inner_fn
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_fixes.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# JSONDecodeError was introduced in requests=2.27 released in 2022.
|
2 |
+
# This allows us to support older requests for users
|
3 |
+
# More information: https://github.com/psf/requests/pull/5856
|
4 |
+
try:
|
5 |
+
from requests import JSONDecodeError # type: ignore # noqa: F401
|
6 |
+
except ImportError:
|
7 |
+
try:
|
8 |
+
from simplejson import JSONDecodeError # type: ignore # noqa: F401
|
9 |
+
except ImportError:
|
10 |
+
from json import JSONDecodeError # type: ignore # noqa: F401
|
11 |
+
import contextlib
|
12 |
+
import os
|
13 |
+
import shutil
|
14 |
+
import stat
|
15 |
+
import tempfile
|
16 |
+
import time
|
17 |
+
from functools import partial
|
18 |
+
from pathlib import Path
|
19 |
+
from typing import Callable, Generator, Optional, Union
|
20 |
+
|
21 |
+
import yaml
|
22 |
+
from filelock import BaseFileLock, FileLock, SoftFileLock, Timeout
|
23 |
+
|
24 |
+
from .. import constants
|
25 |
+
from . import logging
|
26 |
+
|
27 |
+
|
28 |
+
logger = logging.get_logger(__name__)
|
29 |
+
|
30 |
+
# Wrap `yaml.dump` to set `allow_unicode=True` by default.
|
31 |
+
#
|
32 |
+
# Example:
|
33 |
+
# ```py
|
34 |
+
# >>> yaml.dump({"emoji": "👀", "some unicode": "日本か"})
|
35 |
+
# 'emoji: "\\U0001F440"\nsome unicode: "\\u65E5\\u672C\\u304B"\n'
|
36 |
+
#
|
37 |
+
# >>> yaml_dump({"emoji": "👀", "some unicode": "日本か"})
|
38 |
+
# 'emoji: "👀"\nsome unicode: "日本か"\n'
|
39 |
+
# ```
|
40 |
+
yaml_dump: Callable[..., str] = partial(yaml.dump, stream=None, allow_unicode=True) # type: ignore
|
41 |
+
|
42 |
+
|
43 |
+
@contextlib.contextmanager
|
44 |
+
def SoftTemporaryDirectory(
|
45 |
+
suffix: Optional[str] = None,
|
46 |
+
prefix: Optional[str] = None,
|
47 |
+
dir: Optional[Union[Path, str]] = None,
|
48 |
+
**kwargs,
|
49 |
+
) -> Generator[Path, None, None]:
|
50 |
+
"""
|
51 |
+
Context manager to create a temporary directory and safely delete it.
|
52 |
+
|
53 |
+
If tmp directory cannot be deleted normally, we set the WRITE permission and retry.
|
54 |
+
If cleanup still fails, we give up but don't raise an exception. This is equivalent
|
55 |
+
to `tempfile.TemporaryDirectory(..., ignore_cleanup_errors=True)` introduced in
|
56 |
+
Python 3.10.
|
57 |
+
|
58 |
+
See https://www.scivision.dev/python-tempfile-permission-error-windows/.
|
59 |
+
"""
|
60 |
+
tmpdir = tempfile.TemporaryDirectory(prefix=prefix, suffix=suffix, dir=dir, **kwargs)
|
61 |
+
yield Path(tmpdir.name).resolve()
|
62 |
+
|
63 |
+
try:
|
64 |
+
# First once with normal cleanup
|
65 |
+
shutil.rmtree(tmpdir.name)
|
66 |
+
except Exception:
|
67 |
+
# If failed, try to set write permission and retry
|
68 |
+
try:
|
69 |
+
shutil.rmtree(tmpdir.name, onerror=_set_write_permission_and_retry)
|
70 |
+
except Exception:
|
71 |
+
pass
|
72 |
+
|
73 |
+
# And finally, cleanup the tmpdir.
|
74 |
+
# If it fails again, give up but do not throw error
|
75 |
+
try:
|
76 |
+
tmpdir.cleanup()
|
77 |
+
except Exception:
|
78 |
+
pass
|
79 |
+
|
80 |
+
|
81 |
+
def _set_write_permission_and_retry(func, path, excinfo):
|
82 |
+
os.chmod(path, stat.S_IWRITE)
|
83 |
+
func(path)
|
84 |
+
|
85 |
+
|
86 |
+
@contextlib.contextmanager
|
87 |
+
def WeakFileLock(
|
88 |
+
lock_file: Union[str, Path], *, timeout: Optional[float] = None
|
89 |
+
) -> Generator[BaseFileLock, None, None]:
|
90 |
+
"""A filelock with some custom logic.
|
91 |
+
|
92 |
+
This filelock is weaker than the default filelock in that:
|
93 |
+
1. It won't raise an exception if release fails.
|
94 |
+
2. It will default to a SoftFileLock if the filesystem does not support flock.
|
95 |
+
|
96 |
+
An INFO log message is emitted every 10 seconds if the lock is not acquired immediately.
|
97 |
+
If a timeout is provided, a `filelock.Timeout` exception is raised if the lock is not acquired within the timeout.
|
98 |
+
"""
|
99 |
+
log_interval = constants.FILELOCK_LOG_EVERY_SECONDS
|
100 |
+
lock = FileLock(lock_file, timeout=log_interval)
|
101 |
+
start_time = time.time()
|
102 |
+
|
103 |
+
while True:
|
104 |
+
elapsed_time = time.time() - start_time
|
105 |
+
if timeout is not None and elapsed_time >= timeout:
|
106 |
+
raise Timeout(str(lock_file))
|
107 |
+
|
108 |
+
try:
|
109 |
+
lock.acquire(timeout=min(log_interval, timeout - elapsed_time) if timeout else log_interval)
|
110 |
+
except Timeout:
|
111 |
+
logger.info(
|
112 |
+
f"Still waiting to acquire lock on {lock_file} (elapsed: {time.time() - start_time:.1f} seconds)"
|
113 |
+
)
|
114 |
+
except NotImplementedError as e:
|
115 |
+
if "use SoftFileLock instead" in str(e):
|
116 |
+
logger.warning(
|
117 |
+
"FileSystem does not appear to support flock. Falling back to SoftFileLock for %s", lock_file
|
118 |
+
)
|
119 |
+
lock = SoftFileLock(lock_file, timeout=log_interval)
|
120 |
+
continue
|
121 |
+
else:
|
122 |
+
break
|
123 |
+
|
124 |
+
try:
|
125 |
+
yield lock
|
126 |
+
finally:
|
127 |
+
try:
|
128 |
+
lock.release()
|
129 |
+
except OSError:
|
130 |
+
try:
|
131 |
+
Path(lock_file).unlink()
|
132 |
+
except OSError:
|
133 |
+
pass
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_http.py
ADDED
@@ -0,0 +1,637 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle HTTP requests in Huggingface Hub."""
|
16 |
+
|
17 |
+
import io
|
18 |
+
import os
|
19 |
+
import re
|
20 |
+
import threading
|
21 |
+
import time
|
22 |
+
import uuid
|
23 |
+
from functools import lru_cache
|
24 |
+
from http import HTTPStatus
|
25 |
+
from shlex import quote
|
26 |
+
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
27 |
+
|
28 |
+
import requests
|
29 |
+
from requests import HTTPError, Response
|
30 |
+
from requests.adapters import HTTPAdapter
|
31 |
+
from requests.models import PreparedRequest
|
32 |
+
|
33 |
+
from huggingface_hub.errors import OfflineModeIsEnabled
|
34 |
+
|
35 |
+
from .. import constants
|
36 |
+
from ..errors import (
|
37 |
+
BadRequestError,
|
38 |
+
DisabledRepoError,
|
39 |
+
EntryNotFoundError,
|
40 |
+
GatedRepoError,
|
41 |
+
HfHubHTTPError,
|
42 |
+
RepositoryNotFoundError,
|
43 |
+
RevisionNotFoundError,
|
44 |
+
)
|
45 |
+
from . import logging
|
46 |
+
from ._fixes import JSONDecodeError
|
47 |
+
from ._lfs import SliceFileObj
|
48 |
+
from ._typing import HTTP_METHOD_T
|
49 |
+
|
50 |
+
|
51 |
+
logger = logging.get_logger(__name__)
|
52 |
+
|
53 |
+
# Both headers are used by the Hub to debug failed requests.
|
54 |
+
# `X_AMZN_TRACE_ID` is better as it also works to debug on Cloudfront and ALB.
|
55 |
+
# If `X_AMZN_TRACE_ID` is set, the Hub will use it as well.
|
56 |
+
X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
|
57 |
+
X_REQUEST_ID = "x-request-id"
|
58 |
+
|
59 |
+
REPO_API_REGEX = re.compile(
|
60 |
+
r"""
|
61 |
+
# staging or production endpoint
|
62 |
+
^https://[^/]+
|
63 |
+
(
|
64 |
+
# on /api/repo_type/repo_id
|
65 |
+
/api/(models|datasets|spaces)/(.+)
|
66 |
+
|
|
67 |
+
# or /repo_id/resolve/revision/...
|
68 |
+
/(.+)/resolve/(.+)
|
69 |
+
)
|
70 |
+
""",
|
71 |
+
flags=re.VERBOSE,
|
72 |
+
)
|
73 |
+
|
74 |
+
|
75 |
+
class UniqueRequestIdAdapter(HTTPAdapter):
|
76 |
+
X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
|
77 |
+
|
78 |
+
def add_headers(self, request, **kwargs):
|
79 |
+
super().add_headers(request, **kwargs)
|
80 |
+
|
81 |
+
# Add random request ID => easier for server-side debug
|
82 |
+
if X_AMZN_TRACE_ID not in request.headers:
|
83 |
+
request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
|
84 |
+
|
85 |
+
# Add debug log
|
86 |
+
has_token = len(str(request.headers.get("authorization", ""))) > 0
|
87 |
+
logger.debug(
|
88 |
+
f"Request {request.headers[X_AMZN_TRACE_ID]}: {request.method} {request.url} (authenticated: {has_token})"
|
89 |
+
)
|
90 |
+
|
91 |
+
def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
|
92 |
+
"""Catch any RequestException to append request id to the error message for debugging."""
|
93 |
+
if constants.HF_DEBUG:
|
94 |
+
logger.debug(f"Send: {_curlify(request)}")
|
95 |
+
try:
|
96 |
+
return super().send(request, *args, **kwargs)
|
97 |
+
except requests.RequestException as e:
|
98 |
+
request_id = request.headers.get(X_AMZN_TRACE_ID)
|
99 |
+
if request_id is not None:
|
100 |
+
# Taken from https://stackoverflow.com/a/58270258
|
101 |
+
e.args = (*e.args, f"(Request ID: {request_id})")
|
102 |
+
raise
|
103 |
+
|
104 |
+
|
105 |
+
class OfflineAdapter(HTTPAdapter):
|
106 |
+
def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
|
107 |
+
raise OfflineModeIsEnabled(
|
108 |
+
f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
|
109 |
+
)
|
110 |
+
|
111 |
+
|
112 |
+
def _default_backend_factory() -> requests.Session:
|
113 |
+
session = requests.Session()
|
114 |
+
if constants.HF_HUB_OFFLINE:
|
115 |
+
session.mount("http://", OfflineAdapter())
|
116 |
+
session.mount("https://", OfflineAdapter())
|
117 |
+
else:
|
118 |
+
session.mount("http://", UniqueRequestIdAdapter())
|
119 |
+
session.mount("https://", UniqueRequestIdAdapter())
|
120 |
+
return session
|
121 |
+
|
122 |
+
|
123 |
+
BACKEND_FACTORY_T = Callable[[], requests.Session]
|
124 |
+
_GLOBAL_BACKEND_FACTORY: BACKEND_FACTORY_T = _default_backend_factory
|
125 |
+
|
126 |
+
|
127 |
+
def configure_http_backend(backend_factory: BACKEND_FACTORY_T = _default_backend_factory) -> None:
|
128 |
+
"""
|
129 |
+
Configure the HTTP backend by providing a `backend_factory`. Any HTTP calls made by `huggingface_hub` will use a
|
130 |
+
Session object instantiated by this factory. This can be useful if you are running your scripts in a specific
|
131 |
+
environment requiring custom configuration (e.g. custom proxy or certifications).
|
132 |
+
|
133 |
+
Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
|
134 |
+
`huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
|
135 |
+
set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
|
136 |
+
calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
|
137 |
+
|
138 |
+
See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
|
139 |
+
|
140 |
+
Example:
|
141 |
+
```py
|
142 |
+
import requests
|
143 |
+
from huggingface_hub import configure_http_backend, get_session
|
144 |
+
|
145 |
+
# Create a factory function that returns a Session with configured proxies
|
146 |
+
def backend_factory() -> requests.Session:
|
147 |
+
session = requests.Session()
|
148 |
+
session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
|
149 |
+
return session
|
150 |
+
|
151 |
+
# Set it as the default session factory
|
152 |
+
configure_http_backend(backend_factory=backend_factory)
|
153 |
+
|
154 |
+
# In practice, this is mostly done internally in `huggingface_hub`
|
155 |
+
session = get_session()
|
156 |
+
```
|
157 |
+
"""
|
158 |
+
global _GLOBAL_BACKEND_FACTORY
|
159 |
+
_GLOBAL_BACKEND_FACTORY = backend_factory
|
160 |
+
reset_sessions()
|
161 |
+
|
162 |
+
|
163 |
+
def get_session() -> requests.Session:
|
164 |
+
"""
|
165 |
+
Get a `requests.Session` object, using the session factory from the user.
|
166 |
+
|
167 |
+
Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
|
168 |
+
`huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
|
169 |
+
set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
|
170 |
+
calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
|
171 |
+
|
172 |
+
See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
|
173 |
+
|
174 |
+
Example:
|
175 |
+
```py
|
176 |
+
import requests
|
177 |
+
from huggingface_hub import configure_http_backend, get_session
|
178 |
+
|
179 |
+
# Create a factory function that returns a Session with configured proxies
|
180 |
+
def backend_factory() -> requests.Session:
|
181 |
+
session = requests.Session()
|
182 |
+
session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
|
183 |
+
return session
|
184 |
+
|
185 |
+
# Set it as the default session factory
|
186 |
+
configure_http_backend(backend_factory=backend_factory)
|
187 |
+
|
188 |
+
# In practice, this is mostly done internally in `huggingface_hub`
|
189 |
+
session = get_session()
|
190 |
+
```
|
191 |
+
"""
|
192 |
+
return _get_session_from_cache(process_id=os.getpid(), thread_id=threading.get_ident())
|
193 |
+
|
194 |
+
|
195 |
+
def reset_sessions() -> None:
|
196 |
+
"""Reset the cache of sessions.
|
197 |
+
|
198 |
+
Mostly used internally when sessions are reconfigured or an SSLError is raised.
|
199 |
+
See [`configure_http_backend`] for more details.
|
200 |
+
"""
|
201 |
+
_get_session_from_cache.cache_clear()
|
202 |
+
|
203 |
+
|
204 |
+
@lru_cache
|
205 |
+
def _get_session_from_cache(process_id: int, thread_id: int) -> requests.Session:
|
206 |
+
"""
|
207 |
+
Create a new session per thread using global factory. Using LRU cache (maxsize 128) to avoid memory leaks when
|
208 |
+
using thousands of threads. Cache is cleared when `configure_http_backend` is called.
|
209 |
+
"""
|
210 |
+
return _GLOBAL_BACKEND_FACTORY()
|
211 |
+
|
212 |
+
|
213 |
+
def http_backoff(
|
214 |
+
method: HTTP_METHOD_T,
|
215 |
+
url: str,
|
216 |
+
*,
|
217 |
+
max_retries: int = 5,
|
218 |
+
base_wait_time: float = 1,
|
219 |
+
max_wait_time: float = 8,
|
220 |
+
retry_on_exceptions: Union[Type[Exception], Tuple[Type[Exception], ...]] = (
|
221 |
+
requests.Timeout,
|
222 |
+
requests.ConnectionError,
|
223 |
+
),
|
224 |
+
retry_on_status_codes: Union[int, Tuple[int, ...]] = HTTPStatus.SERVICE_UNAVAILABLE,
|
225 |
+
**kwargs,
|
226 |
+
) -> Response:
|
227 |
+
"""Wrapper around requests to retry calls on an endpoint, with exponential backoff.
|
228 |
+
|
229 |
+
Endpoint call is retried on exceptions (ex: connection timeout, proxy error,...)
|
230 |
+
and/or on specific status codes (ex: service unavailable). If the call failed more
|
231 |
+
than `max_retries`, the exception is thrown or `raise_for_status` is called on the
|
232 |
+
response object.
|
233 |
+
|
234 |
+
Re-implement mechanisms from the `backoff` library to avoid adding an external
|
235 |
+
dependencies to `hugging_face_hub`. See https://github.com/litl/backoff.
|
236 |
+
|
237 |
+
Args:
|
238 |
+
method (`Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]`):
|
239 |
+
HTTP method to perform.
|
240 |
+
url (`str`):
|
241 |
+
The URL of the resource to fetch.
|
242 |
+
max_retries (`int`, *optional*, defaults to `5`):
|
243 |
+
Maximum number of retries, defaults to 5 (no retries).
|
244 |
+
base_wait_time (`float`, *optional*, defaults to `1`):
|
245 |
+
Duration (in seconds) to wait before retrying the first time.
|
246 |
+
Wait time between retries then grows exponentially, capped by
|
247 |
+
`max_wait_time`.
|
248 |
+
max_wait_time (`float`, *optional*, defaults to `8`):
|
249 |
+
Maximum duration (in seconds) to wait before retrying.
|
250 |
+
retry_on_exceptions (`Type[Exception]` or `Tuple[Type[Exception]]`, *optional*):
|
251 |
+
Define which exceptions must be caught to retry the request. Can be a single type or a tuple of types.
|
252 |
+
By default, retry on `requests.Timeout` and `requests.ConnectionError`.
|
253 |
+
retry_on_status_codes (`int` or `Tuple[int]`, *optional*, defaults to `503`):
|
254 |
+
Define on which status codes the request must be retried. By default, only
|
255 |
+
HTTP 503 Service Unavailable is retried.
|
256 |
+
**kwargs (`dict`, *optional*):
|
257 |
+
kwargs to pass to `requests.request`.
|
258 |
+
|
259 |
+
Example:
|
260 |
+
```
|
261 |
+
>>> from huggingface_hub.utils import http_backoff
|
262 |
+
|
263 |
+
# Same usage as "requests.request".
|
264 |
+
>>> response = http_backoff("GET", "https://www.google.com")
|
265 |
+
>>> response.raise_for_status()
|
266 |
+
|
267 |
+
# If you expect a Gateway Timeout from time to time
|
268 |
+
>>> http_backoff("PUT", upload_url, data=data, retry_on_status_codes=504)
|
269 |
+
>>> response.raise_for_status()
|
270 |
+
```
|
271 |
+
|
272 |
+
<Tip warning={true}>
|
273 |
+
|
274 |
+
When using `requests` it is possible to stream data by passing an iterator to the
|
275 |
+
`data` argument. On http backoff this is a problem as the iterator is not reset
|
276 |
+
after a failed call. This issue is mitigated for file objects or any IO streams
|
277 |
+
by saving the initial position of the cursor (with `data.tell()`) and resetting the
|
278 |
+
cursor between each call (with `data.seek()`). For arbitrary iterators, http backoff
|
279 |
+
will fail. If this is a hard constraint for you, please let us know by opening an
|
280 |
+
issue on [Github](https://github.com/huggingface/huggingface_hub).
|
281 |
+
|
282 |
+
</Tip>
|
283 |
+
"""
|
284 |
+
if isinstance(retry_on_exceptions, type): # Tuple from single exception type
|
285 |
+
retry_on_exceptions = (retry_on_exceptions,)
|
286 |
+
|
287 |
+
if isinstance(retry_on_status_codes, int): # Tuple from single status code
|
288 |
+
retry_on_status_codes = (retry_on_status_codes,)
|
289 |
+
|
290 |
+
nb_tries = 0
|
291 |
+
sleep_time = base_wait_time
|
292 |
+
|
293 |
+
# If `data` is used and is a file object (or any IO), it will be consumed on the
|
294 |
+
# first HTTP request. We need to save the initial position so that the full content
|
295 |
+
# of the file is re-sent on http backoff. See warning tip in docstring.
|
296 |
+
io_obj_initial_pos = None
|
297 |
+
if "data" in kwargs and isinstance(kwargs["data"], (io.IOBase, SliceFileObj)):
|
298 |
+
io_obj_initial_pos = kwargs["data"].tell()
|
299 |
+
|
300 |
+
session = get_session()
|
301 |
+
while True:
|
302 |
+
nb_tries += 1
|
303 |
+
try:
|
304 |
+
# If `data` is used and is a file object (or any IO), set back cursor to
|
305 |
+
# initial position.
|
306 |
+
if io_obj_initial_pos is not None:
|
307 |
+
kwargs["data"].seek(io_obj_initial_pos)
|
308 |
+
|
309 |
+
# Perform request and return if status_code is not in the retry list.
|
310 |
+
response = session.request(method=method, url=url, **kwargs)
|
311 |
+
if response.status_code not in retry_on_status_codes:
|
312 |
+
return response
|
313 |
+
|
314 |
+
# Wrong status code returned (HTTP 503 for instance)
|
315 |
+
logger.warning(f"HTTP Error {response.status_code} thrown while requesting {method} {url}")
|
316 |
+
if nb_tries > max_retries:
|
317 |
+
response.raise_for_status() # Will raise uncaught exception
|
318 |
+
# We return response to avoid infinite loop in the corner case where the
|
319 |
+
# user ask for retry on a status code that doesn't raise_for_status.
|
320 |
+
return response
|
321 |
+
|
322 |
+
except retry_on_exceptions as err:
|
323 |
+
logger.warning(f"'{err}' thrown while requesting {method} {url}")
|
324 |
+
|
325 |
+
if isinstance(err, requests.ConnectionError):
|
326 |
+
reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
|
327 |
+
|
328 |
+
if nb_tries > max_retries:
|
329 |
+
raise err
|
330 |
+
|
331 |
+
# Sleep for X seconds
|
332 |
+
logger.warning(f"Retrying in {sleep_time}s [Retry {nb_tries}/{max_retries}].")
|
333 |
+
time.sleep(sleep_time)
|
334 |
+
|
335 |
+
# Update sleep time for next retry
|
336 |
+
sleep_time = min(max_wait_time, sleep_time * 2) # Exponential backoff
|
337 |
+
|
338 |
+
|
339 |
+
def fix_hf_endpoint_in_url(url: str, endpoint: Optional[str]) -> str:
|
340 |
+
"""Replace the default endpoint in a URL by a custom one.
|
341 |
+
|
342 |
+
This is useful when using a proxy and the Hugging Face Hub returns a URL with the default endpoint.
|
343 |
+
"""
|
344 |
+
endpoint = endpoint.rstrip("/") if endpoint else constants.ENDPOINT
|
345 |
+
# check if a proxy has been set => if yes, update the returned URL to use the proxy
|
346 |
+
if endpoint not in (constants._HF_DEFAULT_ENDPOINT, constants._HF_DEFAULT_STAGING_ENDPOINT):
|
347 |
+
url = url.replace(constants._HF_DEFAULT_ENDPOINT, endpoint)
|
348 |
+
url = url.replace(constants._HF_DEFAULT_STAGING_ENDPOINT, endpoint)
|
349 |
+
return url
|
350 |
+
|
351 |
+
|
352 |
+
def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None) -> None:
|
353 |
+
"""
|
354 |
+
Internal version of `response.raise_for_status()` that will refine a
|
355 |
+
potential HTTPError. Raised exception will be an instance of `HfHubHTTPError`.
|
356 |
+
|
357 |
+
This helper is meant to be the unique method to raise_for_status when making a call
|
358 |
+
to the Hugging Face Hub.
|
359 |
+
|
360 |
+
|
361 |
+
Example:
|
362 |
+
```py
|
363 |
+
import requests
|
364 |
+
from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
|
365 |
+
|
366 |
+
response = get_session().post(...)
|
367 |
+
try:
|
368 |
+
hf_raise_for_status(response)
|
369 |
+
except HfHubHTTPError as e:
|
370 |
+
print(str(e)) # formatted message
|
371 |
+
e.request_id, e.server_message # details returned by server
|
372 |
+
|
373 |
+
# Complete the error message with additional information once it's raised
|
374 |
+
e.append_to_message("\n`create_commit` expects the repository to exist.")
|
375 |
+
raise
|
376 |
+
```
|
377 |
+
|
378 |
+
Args:
|
379 |
+
response (`Response`):
|
380 |
+
Response from the server.
|
381 |
+
endpoint_name (`str`, *optional*):
|
382 |
+
Name of the endpoint that has been called. If provided, the error message
|
383 |
+
will be more complete.
|
384 |
+
|
385 |
+
<Tip warning={true}>
|
386 |
+
|
387 |
+
Raises when the request has failed:
|
388 |
+
|
389 |
+
- [`~utils.RepositoryNotFoundError`]
|
390 |
+
If the repository to download from cannot be found. This may be because it
|
391 |
+
doesn't exist, because `repo_type` is not set correctly, or because the repo
|
392 |
+
is `private` and you do not have access.
|
393 |
+
- [`~utils.GatedRepoError`]
|
394 |
+
If the repository exists but is gated and the user is not on the authorized
|
395 |
+
list.
|
396 |
+
- [`~utils.RevisionNotFoundError`]
|
397 |
+
If the repository exists but the revision couldn't be find.
|
398 |
+
- [`~utils.EntryNotFoundError`]
|
399 |
+
If the repository exists but the entry (e.g. the requested file) couldn't be
|
400 |
+
find.
|
401 |
+
- [`~utils.BadRequestError`]
|
402 |
+
If request failed with a HTTP 400 BadRequest error.
|
403 |
+
- [`~utils.HfHubHTTPError`]
|
404 |
+
If request failed for a reason not listed above.
|
405 |
+
|
406 |
+
</Tip>
|
407 |
+
"""
|
408 |
+
try:
|
409 |
+
response.raise_for_status()
|
410 |
+
except HTTPError as e:
|
411 |
+
error_code = response.headers.get("X-Error-Code")
|
412 |
+
error_message = response.headers.get("X-Error-Message")
|
413 |
+
|
414 |
+
if error_code == "RevisionNotFound":
|
415 |
+
message = f"{response.status_code} Client Error." + "\n\n" + f"Revision Not Found for url: {response.url}."
|
416 |
+
raise _format(RevisionNotFoundError, message, response) from e
|
417 |
+
|
418 |
+
elif error_code == "EntryNotFound":
|
419 |
+
message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
|
420 |
+
raise _format(EntryNotFoundError, message, response) from e
|
421 |
+
|
422 |
+
elif error_code == "GatedRepo":
|
423 |
+
message = (
|
424 |
+
f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated repo for url {response.url}."
|
425 |
+
)
|
426 |
+
raise _format(GatedRepoError, message, response) from e
|
427 |
+
|
428 |
+
elif error_message == "Access to this resource is disabled.":
|
429 |
+
message = (
|
430 |
+
f"{response.status_code} Client Error."
|
431 |
+
+ "\n\n"
|
432 |
+
+ f"Cannot access repository for url {response.url}."
|
433 |
+
+ "\n"
|
434 |
+
+ "Access to this resource is disabled."
|
435 |
+
)
|
436 |
+
raise _format(DisabledRepoError, message, response) from e
|
437 |
+
|
438 |
+
elif error_code == "RepoNotFound" or (
|
439 |
+
response.status_code == 401
|
440 |
+
and error_message != "Invalid credentials in Authorization header"
|
441 |
+
and response.request is not None
|
442 |
+
and response.request.url is not None
|
443 |
+
and REPO_API_REGEX.search(response.request.url) is not None
|
444 |
+
):
|
445 |
+
# 401 is misleading as it is returned for:
|
446 |
+
# - private and gated repos if user is not authenticated
|
447 |
+
# - missing repos
|
448 |
+
# => for now, we process them as `RepoNotFound` anyway.
|
449 |
+
# See https://gist.github.com/Wauplin/46c27ad266b15998ce56a6603796f0b9
|
450 |
+
message = (
|
451 |
+
f"{response.status_code} Client Error."
|
452 |
+
+ "\n\n"
|
453 |
+
+ f"Repository Not Found for url: {response.url}."
|
454 |
+
+ "\nPlease make sure you specified the correct `repo_id` and"
|
455 |
+
" `repo_type`.\nIf you are trying to access a private or gated repo,"
|
456 |
+
" make sure you are authenticated. For more details, see"
|
457 |
+
" https://huggingface.co/docs/huggingface_hub/authentication"
|
458 |
+
)
|
459 |
+
raise _format(RepositoryNotFoundError, message, response) from e
|
460 |
+
|
461 |
+
elif response.status_code == 400:
|
462 |
+
message = (
|
463 |
+
f"\n\nBad request for {endpoint_name} endpoint:" if endpoint_name is not None else "\n\nBad request:"
|
464 |
+
)
|
465 |
+
raise _format(BadRequestError, message, response) from e
|
466 |
+
|
467 |
+
elif response.status_code == 403:
|
468 |
+
message = (
|
469 |
+
f"\n\n{response.status_code} Forbidden: {error_message}."
|
470 |
+
+ f"\nCannot access content at: {response.url}."
|
471 |
+
+ "\nMake sure your token has the correct permissions."
|
472 |
+
)
|
473 |
+
raise _format(HfHubHTTPError, message, response) from e
|
474 |
+
|
475 |
+
elif response.status_code == 416:
|
476 |
+
range_header = response.request.headers.get("Range")
|
477 |
+
message = f"{e}. Requested range: {range_header}. Content-Range: {response.headers.get('Content-Range')}."
|
478 |
+
raise _format(HfHubHTTPError, message, response) from e
|
479 |
+
|
480 |
+
# Convert `HTTPError` into a `HfHubHTTPError` to display request information
|
481 |
+
# as well (request id and/or server error message)
|
482 |
+
raise _format(HfHubHTTPError, str(e), response) from e
|
483 |
+
|
484 |
+
|
485 |
+
def _format(error_type: Type[HfHubHTTPError], custom_message: str, response: Response) -> HfHubHTTPError:
|
486 |
+
server_errors = []
|
487 |
+
|
488 |
+
# Retrieve server error from header
|
489 |
+
from_headers = response.headers.get("X-Error-Message")
|
490 |
+
if from_headers is not None:
|
491 |
+
server_errors.append(from_headers)
|
492 |
+
|
493 |
+
# Retrieve server error from body
|
494 |
+
try:
|
495 |
+
# Case errors are returned in a JSON format
|
496 |
+
data = response.json()
|
497 |
+
|
498 |
+
error = data.get("error")
|
499 |
+
if error is not None:
|
500 |
+
if isinstance(error, list):
|
501 |
+
# Case {'error': ['my error 1', 'my error 2']}
|
502 |
+
server_errors.extend(error)
|
503 |
+
else:
|
504 |
+
# Case {'error': 'my error'}
|
505 |
+
server_errors.append(error)
|
506 |
+
|
507 |
+
errors = data.get("errors")
|
508 |
+
if errors is not None:
|
509 |
+
# Case {'errors': [{'message': 'my error 1'}, {'message': 'my error 2'}]}
|
510 |
+
for error in errors:
|
511 |
+
if "message" in error:
|
512 |
+
server_errors.append(error["message"])
|
513 |
+
|
514 |
+
except JSONDecodeError:
|
515 |
+
# If content is not JSON and not HTML, append the text
|
516 |
+
content_type = response.headers.get("Content-Type", "")
|
517 |
+
if response.text and "html" not in content_type.lower():
|
518 |
+
server_errors.append(response.text)
|
519 |
+
|
520 |
+
# Strip all server messages
|
521 |
+
server_errors = [str(line).strip() for line in server_errors if str(line).strip()]
|
522 |
+
|
523 |
+
# Deduplicate server messages (keep order)
|
524 |
+
# taken from https://stackoverflow.com/a/17016257
|
525 |
+
server_errors = list(dict.fromkeys(server_errors))
|
526 |
+
|
527 |
+
# Format server error
|
528 |
+
server_message = "\n".join(server_errors)
|
529 |
+
|
530 |
+
# Add server error to custom message
|
531 |
+
final_error_message = custom_message
|
532 |
+
if server_message and server_message.lower() not in custom_message.lower():
|
533 |
+
if "\n\n" in custom_message:
|
534 |
+
final_error_message += "\n" + server_message
|
535 |
+
else:
|
536 |
+
final_error_message += "\n\n" + server_message
|
537 |
+
# Add Request ID
|
538 |
+
request_id = str(response.headers.get(X_REQUEST_ID, ""))
|
539 |
+
if request_id:
|
540 |
+
request_id_message = f" (Request ID: {request_id})"
|
541 |
+
else:
|
542 |
+
# Fallback to X-Amzn-Trace-Id
|
543 |
+
request_id = str(response.headers.get(X_AMZN_TRACE_ID, ""))
|
544 |
+
if request_id:
|
545 |
+
request_id_message = f" (Amzn Trace ID: {request_id})"
|
546 |
+
if request_id and request_id.lower() not in final_error_message.lower():
|
547 |
+
if "\n" in final_error_message:
|
548 |
+
newline_index = final_error_message.index("\n")
|
549 |
+
final_error_message = (
|
550 |
+
final_error_message[:newline_index] + request_id_message + final_error_message[newline_index:]
|
551 |
+
)
|
552 |
+
else:
|
553 |
+
final_error_message += request_id_message
|
554 |
+
|
555 |
+
# Return
|
556 |
+
return error_type(final_error_message.strip(), response=response, server_message=server_message or None)
|
557 |
+
|
558 |
+
|
559 |
+
def _curlify(request: requests.PreparedRequest) -> str:
|
560 |
+
"""Convert a `requests.PreparedRequest` into a curl command (str).
|
561 |
+
|
562 |
+
Used for debug purposes only.
|
563 |
+
|
564 |
+
Implementation vendored from https://github.com/ofw/curlify/blob/master/curlify.py.
|
565 |
+
MIT License Copyright (c) 2016 Egor.
|
566 |
+
"""
|
567 |
+
parts: List[Tuple[Any, Any]] = [
|
568 |
+
("curl", None),
|
569 |
+
("-X", request.method),
|
570 |
+
]
|
571 |
+
|
572 |
+
for k, v in sorted(request.headers.items()):
|
573 |
+
if k.lower() == "authorization":
|
574 |
+
v = "<TOKEN>" # Hide authorization header, no matter its value (can be Bearer, Key, etc.)
|
575 |
+
parts += [("-H", "{0}: {1}".format(k, v))]
|
576 |
+
|
577 |
+
if request.body:
|
578 |
+
body = request.body
|
579 |
+
if isinstance(body, bytes):
|
580 |
+
body = body.decode("utf-8", errors="ignore")
|
581 |
+
elif hasattr(body, "read"):
|
582 |
+
body = "<file-like object>" # Don't try to read it to avoid consuming the stream
|
583 |
+
if len(body) > 1000:
|
584 |
+
body = body[:1000] + " ... [truncated]"
|
585 |
+
parts += [("-d", body.replace("\n", ""))]
|
586 |
+
|
587 |
+
parts += [(None, request.url)]
|
588 |
+
|
589 |
+
flat_parts = []
|
590 |
+
for k, v in parts:
|
591 |
+
if k:
|
592 |
+
flat_parts.append(quote(k))
|
593 |
+
if v:
|
594 |
+
flat_parts.append(quote(v))
|
595 |
+
|
596 |
+
return " ".join(flat_parts)
|
597 |
+
|
598 |
+
|
599 |
+
# Regex to parse HTTP Range header
|
600 |
+
RANGE_REGEX = re.compile(r"^\s*bytes\s*=\s*(\d*)\s*-\s*(\d*)\s*$", re.IGNORECASE)
|
601 |
+
|
602 |
+
|
603 |
+
def _adjust_range_header(original_range: Optional[str], resume_size: int) -> Optional[str]:
|
604 |
+
"""
|
605 |
+
Adjust HTTP Range header to account for resume position.
|
606 |
+
"""
|
607 |
+
if not original_range:
|
608 |
+
return f"bytes={resume_size}-"
|
609 |
+
|
610 |
+
if "," in original_range:
|
611 |
+
raise ValueError(f"Multiple ranges detected - {original_range!r}, not supported yet.")
|
612 |
+
|
613 |
+
match = RANGE_REGEX.match(original_range)
|
614 |
+
if not match:
|
615 |
+
raise RuntimeError(f"Invalid range format - {original_range!r}.")
|
616 |
+
start, end = match.groups()
|
617 |
+
|
618 |
+
if not start:
|
619 |
+
if not end:
|
620 |
+
raise RuntimeError(f"Invalid range format - {original_range!r}.")
|
621 |
+
|
622 |
+
new_suffix = int(end) - resume_size
|
623 |
+
new_range = f"bytes=-{new_suffix}"
|
624 |
+
if new_suffix <= 0:
|
625 |
+
raise RuntimeError(f"Empty new range - {new_range!r}.")
|
626 |
+
return new_range
|
627 |
+
|
628 |
+
start = int(start)
|
629 |
+
new_start = start + resume_size
|
630 |
+
if end:
|
631 |
+
end = int(end)
|
632 |
+
new_range = f"bytes={new_start}-{end}"
|
633 |
+
if new_start > end:
|
634 |
+
raise RuntimeError(f"Empty new range - {new_range!r}.")
|
635 |
+
return new_range
|
636 |
+
|
637 |
+
return f"bytes={new_start}-"
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_pagination.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle pagination on Huggingface Hub."""
|
16 |
+
|
17 |
+
from typing import Dict, Iterable, Optional
|
18 |
+
|
19 |
+
import requests
|
20 |
+
|
21 |
+
from . import get_session, hf_raise_for_status, http_backoff, logging
|
22 |
+
|
23 |
+
|
24 |
+
logger = logging.get_logger(__name__)
|
25 |
+
|
26 |
+
|
27 |
+
def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
|
28 |
+
"""Fetch a list of models/datasets/spaces and paginate through results.
|
29 |
+
|
30 |
+
This is using the same "Link" header format as GitHub.
|
31 |
+
See:
|
32 |
+
- https://requests.readthedocs.io/en/latest/api/#requests.Response.links
|
33 |
+
- https://docs.github.com/en/rest/guides/traversing-with-pagination#link-header
|
34 |
+
"""
|
35 |
+
session = get_session()
|
36 |
+
r = session.get(path, params=params, headers=headers)
|
37 |
+
hf_raise_for_status(r)
|
38 |
+
yield from r.json()
|
39 |
+
|
40 |
+
# Follow pages
|
41 |
+
# Next link already contains query params
|
42 |
+
next_page = _get_next_page(r)
|
43 |
+
while next_page is not None:
|
44 |
+
logger.debug(f"Pagination detected. Requesting next page: {next_page}")
|
45 |
+
r = http_backoff("GET", next_page, max_retries=20, retry_on_status_codes=429, headers=headers)
|
46 |
+
hf_raise_for_status(r)
|
47 |
+
yield from r.json()
|
48 |
+
next_page = _get_next_page(r)
|
49 |
+
|
50 |
+
|
51 |
+
def _get_next_page(response: requests.Response) -> Optional[str]:
|
52 |
+
return response.links.get("next", {}).get("url")
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_paths.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to handle paths in Huggingface Hub."""
|
16 |
+
|
17 |
+
from fnmatch import fnmatch
|
18 |
+
from pathlib import Path
|
19 |
+
from typing import Callable, Generator, Iterable, List, Optional, TypeVar, Union
|
20 |
+
|
21 |
+
|
22 |
+
T = TypeVar("T")
|
23 |
+
|
24 |
+
# Always ignore `.git` and `.cache/huggingface` folders in commits
|
25 |
+
DEFAULT_IGNORE_PATTERNS = [
|
26 |
+
".git",
|
27 |
+
".git/*",
|
28 |
+
"*/.git",
|
29 |
+
"**/.git/**",
|
30 |
+
".cache/huggingface",
|
31 |
+
".cache/huggingface/*",
|
32 |
+
"*/.cache/huggingface",
|
33 |
+
"**/.cache/huggingface/**",
|
34 |
+
]
|
35 |
+
# Forbidden to commit these folders
|
36 |
+
FORBIDDEN_FOLDERS = [".git", ".cache"]
|
37 |
+
|
38 |
+
|
39 |
+
def filter_repo_objects(
|
40 |
+
items: Iterable[T],
|
41 |
+
*,
|
42 |
+
allow_patterns: Optional[Union[List[str], str]] = None,
|
43 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
44 |
+
key: Optional[Callable[[T], str]] = None,
|
45 |
+
) -> Generator[T, None, None]:
|
46 |
+
"""Filter repo objects based on an allowlist and a denylist.
|
47 |
+
|
48 |
+
Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
|
49 |
+
In the later case, `key` must be provided and specifies a function of one argument
|
50 |
+
that is used to extract a path from each element in iterable.
|
51 |
+
|
52 |
+
Patterns are Unix shell-style wildcards which are NOT regular expressions. See
|
53 |
+
https://docs.python.org/3/library/fnmatch.html for more details.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
items (`Iterable`):
|
57 |
+
List of items to filter.
|
58 |
+
allow_patterns (`str` or `List[str]`, *optional*):
|
59 |
+
Patterns constituting the allowlist. If provided, item paths must match at
|
60 |
+
least one pattern from the allowlist.
|
61 |
+
ignore_patterns (`str` or `List[str]`, *optional*):
|
62 |
+
Patterns constituting the denylist. If provided, item paths must not match
|
63 |
+
any patterns from the denylist.
|
64 |
+
key (`Callable[[T], str]`, *optional*):
|
65 |
+
Single-argument function to extract a path from each item. If not provided,
|
66 |
+
the `items` must already be `str` or `Path`.
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
Filtered list of objects, as a generator.
|
70 |
+
|
71 |
+
Raises:
|
72 |
+
:class:`ValueError`:
|
73 |
+
If `key` is not provided and items are not `str` or `Path`.
|
74 |
+
|
75 |
+
Example usage with paths:
|
76 |
+
```python
|
77 |
+
>>> # Filter only PDFs that are not hidden.
|
78 |
+
>>> list(filter_repo_objects(
|
79 |
+
... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
|
80 |
+
... allow_patterns=["*.pdf"],
|
81 |
+
... ignore_patterns=[".*"],
|
82 |
+
... ))
|
83 |
+
["aaa.pdf"]
|
84 |
+
```
|
85 |
+
|
86 |
+
Example usage with objects:
|
87 |
+
```python
|
88 |
+
>>> list(filter_repo_objects(
|
89 |
+
... [
|
90 |
+
... CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")
|
91 |
+
... CommitOperationAdd(path_or_fileobj="/tmp/bbb.jpg", path_in_repo="bbb.jpg")
|
92 |
+
... CommitOperationAdd(path_or_fileobj="/tmp/.ccc.pdf", path_in_repo=".ccc.pdf")
|
93 |
+
... CommitOperationAdd(path_or_fileobj="/tmp/.ddd.png", path_in_repo=".ddd.png")
|
94 |
+
... ],
|
95 |
+
... allow_patterns=["*.pdf"],
|
96 |
+
... ignore_patterns=[".*"],
|
97 |
+
... key=lambda x: x.repo_in_path
|
98 |
+
... ))
|
99 |
+
[CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")]
|
100 |
+
```
|
101 |
+
"""
|
102 |
+
if isinstance(allow_patterns, str):
|
103 |
+
allow_patterns = [allow_patterns]
|
104 |
+
|
105 |
+
if isinstance(ignore_patterns, str):
|
106 |
+
ignore_patterns = [ignore_patterns]
|
107 |
+
|
108 |
+
if allow_patterns is not None:
|
109 |
+
allow_patterns = [_add_wildcard_to_directories(p) for p in allow_patterns]
|
110 |
+
if ignore_patterns is not None:
|
111 |
+
ignore_patterns = [_add_wildcard_to_directories(p) for p in ignore_patterns]
|
112 |
+
|
113 |
+
if key is None:
|
114 |
+
|
115 |
+
def _identity(item: T) -> str:
|
116 |
+
if isinstance(item, str):
|
117 |
+
return item
|
118 |
+
if isinstance(item, Path):
|
119 |
+
return str(item)
|
120 |
+
raise ValueError(f"Please provide `key` argument in `filter_repo_objects`: `{item}` is not a string.")
|
121 |
+
|
122 |
+
key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
|
123 |
+
|
124 |
+
for item in items:
|
125 |
+
path = key(item)
|
126 |
+
|
127 |
+
# Skip if there's an allowlist and path doesn't match any
|
128 |
+
if allow_patterns is not None and not any(fnmatch(path, r) for r in allow_patterns):
|
129 |
+
continue
|
130 |
+
|
131 |
+
# Skip if there's a denylist and path matches any
|
132 |
+
if ignore_patterns is not None and any(fnmatch(path, r) for r in ignore_patterns):
|
133 |
+
continue
|
134 |
+
|
135 |
+
yield item
|
136 |
+
|
137 |
+
|
138 |
+
def _add_wildcard_to_directories(pattern: str) -> str:
|
139 |
+
if pattern[-1] == "/":
|
140 |
+
return pattern + "*"
|
141 |
+
return pattern
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_safetensors.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import functools
|
2 |
+
import operator
|
3 |
+
from collections import defaultdict
|
4 |
+
from dataclasses import dataclass, field
|
5 |
+
from typing import Dict, List, Literal, Optional, Tuple
|
6 |
+
|
7 |
+
|
8 |
+
FILENAME_T = str
|
9 |
+
TENSOR_NAME_T = str
|
10 |
+
DTYPE_T = Literal["F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL"]
|
11 |
+
|
12 |
+
|
13 |
+
@dataclass
|
14 |
+
class TensorInfo:
|
15 |
+
"""Information about a tensor.
|
16 |
+
|
17 |
+
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
18 |
+
|
19 |
+
Attributes:
|
20 |
+
dtype (`str`):
|
21 |
+
The data type of the tensor ("F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL").
|
22 |
+
shape (`List[int]`):
|
23 |
+
The shape of the tensor.
|
24 |
+
data_offsets (`Tuple[int, int]`):
|
25 |
+
The offsets of the data in the file as a tuple `[BEGIN, END]`.
|
26 |
+
parameter_count (`int`):
|
27 |
+
The number of parameters in the tensor.
|
28 |
+
"""
|
29 |
+
|
30 |
+
dtype: DTYPE_T
|
31 |
+
shape: List[int]
|
32 |
+
data_offsets: Tuple[int, int]
|
33 |
+
parameter_count: int = field(init=False)
|
34 |
+
|
35 |
+
def __post_init__(self) -> None:
|
36 |
+
# Taken from https://stackoverflow.com/a/13840436
|
37 |
+
try:
|
38 |
+
self.parameter_count = functools.reduce(operator.mul, self.shape)
|
39 |
+
except TypeError:
|
40 |
+
self.parameter_count = 1 # scalar value has no shape
|
41 |
+
|
42 |
+
|
43 |
+
@dataclass
|
44 |
+
class SafetensorsFileMetadata:
|
45 |
+
"""Metadata for a Safetensors file hosted on the Hub.
|
46 |
+
|
47 |
+
This class is returned by [`parse_safetensors_file_metadata`].
|
48 |
+
|
49 |
+
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
50 |
+
|
51 |
+
Attributes:
|
52 |
+
metadata (`Dict`):
|
53 |
+
The metadata contained in the file.
|
54 |
+
tensors (`Dict[str, TensorInfo]`):
|
55 |
+
A map of all tensors. Keys are tensor names and values are information about the corresponding tensor, as a
|
56 |
+
[`TensorInfo`] object.
|
57 |
+
parameter_count (`Dict[str, int]`):
|
58 |
+
A map of the number of parameters per data type. Keys are data types and values are the number of parameters
|
59 |
+
of that data type.
|
60 |
+
"""
|
61 |
+
|
62 |
+
metadata: Dict[str, str]
|
63 |
+
tensors: Dict[TENSOR_NAME_T, TensorInfo]
|
64 |
+
parameter_count: Dict[DTYPE_T, int] = field(init=False)
|
65 |
+
|
66 |
+
def __post_init__(self) -> None:
|
67 |
+
parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
|
68 |
+
for tensor in self.tensors.values():
|
69 |
+
parameter_count[tensor.dtype] += tensor.parameter_count
|
70 |
+
self.parameter_count = dict(parameter_count)
|
71 |
+
|
72 |
+
|
73 |
+
@dataclass
|
74 |
+
class SafetensorsRepoMetadata:
|
75 |
+
"""Metadata for a Safetensors repo.
|
76 |
+
|
77 |
+
A repo is considered to be a Safetensors repo if it contains either a 'model.safetensors' weight file (non-shared
|
78 |
+
model) or a 'model.safetensors.index.json' index file (sharded model) at its root.
|
79 |
+
|
80 |
+
This class is returned by [`get_safetensors_metadata`].
|
81 |
+
|
82 |
+
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
83 |
+
|
84 |
+
Attributes:
|
85 |
+
metadata (`Dict`, *optional*):
|
86 |
+
The metadata contained in the 'model.safetensors.index.json' file, if it exists. Only populated for sharded
|
87 |
+
models.
|
88 |
+
sharded (`bool`):
|
89 |
+
Whether the repo contains a sharded model or not.
|
90 |
+
weight_map (`Dict[str, str]`):
|
91 |
+
A map of all weights. Keys are tensor names and values are filenames of the files containing the tensors.
|
92 |
+
files_metadata (`Dict[str, SafetensorsFileMetadata]`):
|
93 |
+
A map of all files metadata. Keys are filenames and values are the metadata of the corresponding file, as
|
94 |
+
a [`SafetensorsFileMetadata`] object.
|
95 |
+
parameter_count (`Dict[str, int]`):
|
96 |
+
A map of the number of parameters per data type. Keys are data types and values are the number of parameters
|
97 |
+
of that data type.
|
98 |
+
"""
|
99 |
+
|
100 |
+
metadata: Optional[Dict]
|
101 |
+
sharded: bool
|
102 |
+
weight_map: Dict[TENSOR_NAME_T, FILENAME_T] # tensor name -> filename
|
103 |
+
files_metadata: Dict[FILENAME_T, SafetensorsFileMetadata] # filename -> metadata
|
104 |
+
parameter_count: Dict[DTYPE_T, int] = field(init=False)
|
105 |
+
|
106 |
+
def __post_init__(self) -> None:
|
107 |
+
parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
|
108 |
+
for file_metadata in self.files_metadata.values():
|
109 |
+
for dtype, nb_parameters_ in file_metadata.parameter_count.items():
|
110 |
+
parameter_count[dtype] += nb_parameters_
|
111 |
+
self.parameter_count = dict(parameter_count)
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2022-present, the HuggingFace Inc. team.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Contains utilities to validate argument values in `huggingface_hub`."""
|
16 |
+
|
17 |
+
import inspect
|
18 |
+
import re
|
19 |
+
import warnings
|
20 |
+
from functools import wraps
|
21 |
+
from itertools import chain
|
22 |
+
from typing import Any, Dict
|
23 |
+
|
24 |
+
from huggingface_hub.errors import HFValidationError
|
25 |
+
|
26 |
+
from ._typing import CallableT
|
27 |
+
|
28 |
+
|
29 |
+
REPO_ID_REGEX = re.compile(
|
30 |
+
r"""
|
31 |
+
^
|
32 |
+
(\b[\w\-.]+\b/)? # optional namespace (username or organization)
|
33 |
+
\b # starts with a word boundary
|
34 |
+
[\w\-.]{1,96} # repo_name: alphanumeric + . _ -
|
35 |
+
\b # ends with a word boundary
|
36 |
+
$
|
37 |
+
""",
|
38 |
+
flags=re.VERBOSE,
|
39 |
+
)
|
40 |
+
|
41 |
+
|
42 |
+
def validate_hf_hub_args(fn: CallableT) -> CallableT:
|
43 |
+
"""Validate values received as argument for any public method of `huggingface_hub`.
|
44 |
+
|
45 |
+
The goal of this decorator is to harmonize validation of arguments reused
|
46 |
+
everywhere. By default, all defined validators are tested.
|
47 |
+
|
48 |
+
Validators:
|
49 |
+
- [`~utils.validate_repo_id`]: `repo_id` must be `"repo_name"`
|
50 |
+
or `"namespace/repo_name"`. Namespace is a username or an organization.
|
51 |
+
- [`~utils.smoothly_deprecate_use_auth_token`]: Use `token` instead of
|
52 |
+
`use_auth_token` (only if `use_auth_token` is not expected by the decorated
|
53 |
+
function - in practice, always the case in `huggingface_hub`).
|
54 |
+
|
55 |
+
Example:
|
56 |
+
```py
|
57 |
+
>>> from huggingface_hub.utils import validate_hf_hub_args
|
58 |
+
|
59 |
+
>>> @validate_hf_hub_args
|
60 |
+
... def my_cool_method(repo_id: str):
|
61 |
+
... print(repo_id)
|
62 |
+
|
63 |
+
>>> my_cool_method(repo_id="valid_repo_id")
|
64 |
+
valid_repo_id
|
65 |
+
|
66 |
+
>>> my_cool_method("other..repo..id")
|
67 |
+
huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
|
68 |
+
|
69 |
+
>>> my_cool_method(repo_id="other..repo..id")
|
70 |
+
huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
|
71 |
+
|
72 |
+
>>> @validate_hf_hub_args
|
73 |
+
... def my_cool_auth_method(token: str):
|
74 |
+
... print(token)
|
75 |
+
|
76 |
+
>>> my_cool_auth_method(token="a token")
|
77 |
+
"a token"
|
78 |
+
|
79 |
+
>>> my_cool_auth_method(use_auth_token="a use_auth_token")
|
80 |
+
"a use_auth_token"
|
81 |
+
|
82 |
+
>>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token")
|
83 |
+
UserWarning: Both `token` and `use_auth_token` are passed (...)
|
84 |
+
"a token"
|
85 |
+
```
|
86 |
+
|
87 |
+
Raises:
|
88 |
+
[`~utils.HFValidationError`]:
|
89 |
+
If an input is not valid.
|
90 |
+
"""
|
91 |
+
# TODO: add an argument to opt-out validation for specific argument?
|
92 |
+
signature = inspect.signature(fn)
|
93 |
+
|
94 |
+
# Should the validator switch `use_auth_token` values to `token`? In practice, always
|
95 |
+
# True in `huggingface_hub`. Might not be the case in a downstream library.
|
96 |
+
check_use_auth_token = "use_auth_token" not in signature.parameters and "token" in signature.parameters
|
97 |
+
|
98 |
+
@wraps(fn)
|
99 |
+
def _inner_fn(*args, **kwargs):
|
100 |
+
has_token = False
|
101 |
+
for arg_name, arg_value in chain(
|
102 |
+
zip(signature.parameters, args), # Args values
|
103 |
+
kwargs.items(), # Kwargs values
|
104 |
+
):
|
105 |
+
if arg_name in ["repo_id", "from_id", "to_id"]:
|
106 |
+
validate_repo_id(arg_value)
|
107 |
+
|
108 |
+
elif arg_name == "token" and arg_value is not None:
|
109 |
+
has_token = True
|
110 |
+
|
111 |
+
if check_use_auth_token:
|
112 |
+
kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
|
113 |
+
|
114 |
+
return fn(*args, **kwargs)
|
115 |
+
|
116 |
+
return _inner_fn # type: ignore
|
117 |
+
|
118 |
+
|
119 |
+
def validate_repo_id(repo_id: str) -> None:
|
120 |
+
"""Validate `repo_id` is valid.
|
121 |
+
|
122 |
+
This is not meant to replace the proper validation made on the Hub but rather to
|
123 |
+
avoid local inconsistencies whenever possible (example: passing `repo_type` in the
|
124 |
+
`repo_id` is forbidden).
|
125 |
+
|
126 |
+
Rules:
|
127 |
+
- Between 1 and 96 characters.
|
128 |
+
- Either "repo_name" or "namespace/repo_name"
|
129 |
+
- [a-zA-Z0-9] or "-", "_", "."
|
130 |
+
- "--" and ".." are forbidden
|
131 |
+
|
132 |
+
Valid: `"foo"`, `"foo/bar"`, `"123"`, `"Foo-BAR_foo.bar123"`
|
133 |
+
|
134 |
+
Not valid: `"datasets/foo/bar"`, `".repo_id"`, `"foo--bar"`, `"foo.git"`
|
135 |
+
|
136 |
+
Example:
|
137 |
+
```py
|
138 |
+
>>> from huggingface_hub.utils import validate_repo_id
|
139 |
+
>>> validate_repo_id(repo_id="valid_repo_id")
|
140 |
+
>>> validate_repo_id(repo_id="other..repo..id")
|
141 |
+
huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
|
142 |
+
```
|
143 |
+
|
144 |
+
Discussed in https://github.com/huggingface/huggingface_hub/issues/1008.
|
145 |
+
In moon-landing (internal repository):
|
146 |
+
- https://github.com/huggingface/moon-landing/blob/main/server/lib/Names.ts#L27
|
147 |
+
- https://github.com/huggingface/moon-landing/blob/main/server/views/components/NewRepoForm/NewRepoForm.svelte#L138
|
148 |
+
"""
|
149 |
+
if not isinstance(repo_id, str):
|
150 |
+
# Typically, a Path is not a repo_id
|
151 |
+
raise HFValidationError(f"Repo id must be a string, not {type(repo_id)}: '{repo_id}'.")
|
152 |
+
|
153 |
+
if repo_id.count("/") > 1:
|
154 |
+
raise HFValidationError(
|
155 |
+
"Repo id must be in the form 'repo_name' or 'namespace/repo_name':"
|
156 |
+
f" '{repo_id}'. Use `repo_type` argument if needed."
|
157 |
+
)
|
158 |
+
|
159 |
+
if not REPO_ID_REGEX.match(repo_id):
|
160 |
+
raise HFValidationError(
|
161 |
+
"Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are"
|
162 |
+
" forbidden, '-' and '.' cannot start or end the name, max length is 96:"
|
163 |
+
f" '{repo_id}'."
|
164 |
+
)
|
165 |
+
|
166 |
+
if "--" in repo_id or ".." in repo_id:
|
167 |
+
raise HFValidationError(f"Cannot have -- or .. in repo_id: '{repo_id}'.")
|
168 |
+
|
169 |
+
if repo_id.endswith(".git"):
|
170 |
+
raise HFValidationError(f"Repo_id cannot end by '.git': '{repo_id}'.")
|
171 |
+
|
172 |
+
|
173 |
+
def smoothly_deprecate_use_auth_token(fn_name: str, has_token: bool, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
174 |
+
"""Smoothly deprecate `use_auth_token` in the `huggingface_hub` codebase.
|
175 |
+
|
176 |
+
The long-term goal is to remove any mention of `use_auth_token` in the codebase in
|
177 |
+
favor of a unique and less verbose `token` argument. This will be done a few steps:
|
178 |
+
|
179 |
+
0. Step 0: methods that require a read-access to the Hub use the `use_auth_token`
|
180 |
+
argument (`str`, `bool` or `None`). Methods requiring write-access have a `token`
|
181 |
+
argument (`str`, `None`). This implicit rule exists to be able to not send the
|
182 |
+
token when not necessary (`use_auth_token=False`) even if logged in.
|
183 |
+
|
184 |
+
1. Step 1: we want to harmonize everything and use `token` everywhere (supporting
|
185 |
+
`token=False` for read-only methods). In order not to break existing code, if
|
186 |
+
`use_auth_token` is passed to a function, the `use_auth_token` value is passed
|
187 |
+
as `token` instead, without any warning.
|
188 |
+
a. Corner case: if both `use_auth_token` and `token` values are passed, a warning
|
189 |
+
is thrown and the `use_auth_token` value is ignored.
|
190 |
+
|
191 |
+
2. Step 2: Once it is release, we should push downstream libraries to switch from
|
192 |
+
`use_auth_token` to `token` as much as possible, but without throwing a warning
|
193 |
+
(e.g. manually create issues on the corresponding repos).
|
194 |
+
|
195 |
+
3. Step 3: After a transitional period (6 months e.g. until April 2023?), we update
|
196 |
+
`huggingface_hub` to throw a warning on `use_auth_token`. Hopefully, very few
|
197 |
+
users will be impacted as it would have already been fixed.
|
198 |
+
In addition, unit tests in `huggingface_hub` must be adapted to expect warnings
|
199 |
+
to be thrown (but still use `use_auth_token` as before).
|
200 |
+
|
201 |
+
4. Step 4: After a normal deprecation cycle (3 releases ?), remove this validator.
|
202 |
+
`use_auth_token` will definitely not be supported.
|
203 |
+
In addition, we update unit tests in `huggingface_hub` to use `token` everywhere.
|
204 |
+
|
205 |
+
This has been discussed in:
|
206 |
+
- https://github.com/huggingface/huggingface_hub/issues/1094.
|
207 |
+
- https://github.com/huggingface/huggingface_hub/pull/928
|
208 |
+
- (related) https://github.com/huggingface/huggingface_hub/pull/1064
|
209 |
+
"""
|
210 |
+
new_kwargs = kwargs.copy() # do not mutate input !
|
211 |
+
|
212 |
+
use_auth_token = new_kwargs.pop("use_auth_token", None) # remove from kwargs
|
213 |
+
if use_auth_token is not None:
|
214 |
+
if has_token:
|
215 |
+
warnings.warn(
|
216 |
+
"Both `token` and `use_auth_token` are passed to"
|
217 |
+
f" `{fn_name}` with non-None values. `token` is now the"
|
218 |
+
" preferred argument to pass a User Access Token."
|
219 |
+
" `use_auth_token` value will be ignored."
|
220 |
+
)
|
221 |
+
else:
|
222 |
+
# `token` argument is not passed and a non-None value is passed in
|
223 |
+
# `use_auth_token` => use `use_auth_token` value as `token` kwarg.
|
224 |
+
new_kwargs["token"] = use_auth_token
|
225 |
+
|
226 |
+
return new_kwargs
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/_xet.py
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from enum import Enum
|
3 |
+
from typing import Dict, Optional
|
4 |
+
|
5 |
+
import requests
|
6 |
+
|
7 |
+
from .. import constants
|
8 |
+
from . import get_session, hf_raise_for_status, validate_hf_hub_args
|
9 |
+
|
10 |
+
|
11 |
+
class XetTokenType(str, Enum):
|
12 |
+
READ = "read"
|
13 |
+
WRITE = "write"
|
14 |
+
|
15 |
+
|
16 |
+
@dataclass(frozen=True)
|
17 |
+
class XetFileData:
|
18 |
+
file_hash: str
|
19 |
+
refresh_route: str
|
20 |
+
|
21 |
+
|
22 |
+
@dataclass(frozen=True)
|
23 |
+
class XetConnectionInfo:
|
24 |
+
access_token: str
|
25 |
+
expiration_unix_epoch: int
|
26 |
+
endpoint: str
|
27 |
+
|
28 |
+
|
29 |
+
def parse_xet_file_data_from_response(response: requests.Response) -> Optional[XetFileData]:
|
30 |
+
"""
|
31 |
+
Parse XET file metadata from an HTTP response.
|
32 |
+
|
33 |
+
This function extracts XET file metadata from the HTTP headers or HTTP links
|
34 |
+
of a given response object. If the required metadata is not found, it returns `None`.
|
35 |
+
|
36 |
+
Args:
|
37 |
+
response (`requests.Response`):
|
38 |
+
The HTTP response object containing headers dict and links dict to extract the XET metadata from.
|
39 |
+
Returns:
|
40 |
+
`Optional[XetFileData]`:
|
41 |
+
An instance of `XetFileData` containing the file hash and refresh route if the metadata
|
42 |
+
is found. Returns `None` if the required metadata is missing.
|
43 |
+
"""
|
44 |
+
if response is None:
|
45 |
+
return None
|
46 |
+
try:
|
47 |
+
file_hash = response.headers[constants.HUGGINGFACE_HEADER_X_XET_HASH]
|
48 |
+
|
49 |
+
if constants.HUGGINGFACE_HEADER_LINK_XET_AUTH_KEY in response.links:
|
50 |
+
refresh_route = response.links[constants.HUGGINGFACE_HEADER_LINK_XET_AUTH_KEY]["url"]
|
51 |
+
else:
|
52 |
+
refresh_route = response.headers[constants.HUGGINGFACE_HEADER_X_XET_REFRESH_ROUTE]
|
53 |
+
except KeyError:
|
54 |
+
return None
|
55 |
+
|
56 |
+
return XetFileData(
|
57 |
+
file_hash=file_hash,
|
58 |
+
refresh_route=refresh_route,
|
59 |
+
)
|
60 |
+
|
61 |
+
|
62 |
+
def parse_xet_connection_info_from_headers(headers: Dict[str, str]) -> Optional[XetConnectionInfo]:
|
63 |
+
"""
|
64 |
+
Parse XET connection info from the HTTP headers or return None if not found.
|
65 |
+
Args:
|
66 |
+
headers (`Dict`):
|
67 |
+
HTTP headers to extract the XET metadata from.
|
68 |
+
Returns:
|
69 |
+
`XetConnectionInfo` or `None`:
|
70 |
+
The information needed to connect to the XET storage service.
|
71 |
+
Returns `None` if the headers do not contain the XET connection info.
|
72 |
+
"""
|
73 |
+
try:
|
74 |
+
endpoint = headers[constants.HUGGINGFACE_HEADER_X_XET_ENDPOINT]
|
75 |
+
access_token = headers[constants.HUGGINGFACE_HEADER_X_XET_ACCESS_TOKEN]
|
76 |
+
expiration_unix_epoch = int(headers[constants.HUGGINGFACE_HEADER_X_XET_EXPIRATION])
|
77 |
+
except (KeyError, ValueError, TypeError):
|
78 |
+
return None
|
79 |
+
|
80 |
+
return XetConnectionInfo(
|
81 |
+
endpoint=endpoint,
|
82 |
+
access_token=access_token,
|
83 |
+
expiration_unix_epoch=expiration_unix_epoch,
|
84 |
+
)
|
85 |
+
|
86 |
+
|
87 |
+
@validate_hf_hub_args
|
88 |
+
def refresh_xet_connection_info(
|
89 |
+
*,
|
90 |
+
file_data: XetFileData,
|
91 |
+
headers: Dict[str, str],
|
92 |
+
) -> XetConnectionInfo:
|
93 |
+
"""
|
94 |
+
Utilizes the information in the parsed metadata to request the Hub xet connection information.
|
95 |
+
This includes the access token, expiration, and XET service URL.
|
96 |
+
Args:
|
97 |
+
file_data: (`XetFileData`):
|
98 |
+
The file data needed to refresh the xet connection information.
|
99 |
+
headers (`Dict[str, str]`):
|
100 |
+
Headers to use for the request, including authorization headers and user agent.
|
101 |
+
Returns:
|
102 |
+
`XetConnectionInfo`:
|
103 |
+
The connection information needed to make the request to the xet storage service.
|
104 |
+
Raises:
|
105 |
+
[`~utils.HfHubHTTPError`]
|
106 |
+
If the Hub API returned an error.
|
107 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
108 |
+
If the Hub API response is improperly formatted.
|
109 |
+
"""
|
110 |
+
if file_data.refresh_route is None:
|
111 |
+
raise ValueError("The provided xet metadata does not contain a refresh endpoint.")
|
112 |
+
return _fetch_xet_connection_info_with_url(file_data.refresh_route, headers)
|
113 |
+
|
114 |
+
|
115 |
+
@validate_hf_hub_args
|
116 |
+
def fetch_xet_connection_info_from_repo_info(
|
117 |
+
*,
|
118 |
+
token_type: XetTokenType,
|
119 |
+
repo_id: str,
|
120 |
+
repo_type: str,
|
121 |
+
revision: Optional[str] = None,
|
122 |
+
headers: Dict[str, str],
|
123 |
+
endpoint: Optional[str] = None,
|
124 |
+
params: Optional[Dict[str, str]] = None,
|
125 |
+
) -> XetConnectionInfo:
|
126 |
+
"""
|
127 |
+
Uses the repo info to request a xet access token from Hub.
|
128 |
+
Args:
|
129 |
+
token_type (`XetTokenType`):
|
130 |
+
Type of the token to request: `"read"` or `"write"`.
|
131 |
+
repo_id (`str`):
|
132 |
+
A namespace (user or an organization) and a repo name separated by a `/`.
|
133 |
+
repo_type (`str`):
|
134 |
+
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
|
135 |
+
revision (`str`, `optional`):
|
136 |
+
The revision of the repo to get the token for.
|
137 |
+
headers (`Dict[str, str]`):
|
138 |
+
Headers to use for the request, including authorization headers and user agent.
|
139 |
+
endpoint (`str`, `optional`):
|
140 |
+
The endpoint to use for the request. Defaults to the Hub endpoint.
|
141 |
+
params (`Dict[str, str]`, `optional`):
|
142 |
+
Additional parameters to pass with the request.
|
143 |
+
Returns:
|
144 |
+
`XetConnectionInfo`:
|
145 |
+
The connection information needed to make the request to the xet storage service.
|
146 |
+
Raises:
|
147 |
+
[`~utils.HfHubHTTPError`]
|
148 |
+
If the Hub API returned an error.
|
149 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
150 |
+
If the Hub API response is improperly formatted.
|
151 |
+
"""
|
152 |
+
endpoint = endpoint if endpoint is not None else constants.ENDPOINT
|
153 |
+
url = f"{endpoint}/api/{repo_type}s/{repo_id}/xet-{token_type.value}-token/{revision}"
|
154 |
+
return _fetch_xet_connection_info_with_url(url, headers, params)
|
155 |
+
|
156 |
+
|
157 |
+
@validate_hf_hub_args
|
158 |
+
def _fetch_xet_connection_info_with_url(
|
159 |
+
url: str,
|
160 |
+
headers: Dict[str, str],
|
161 |
+
params: Optional[Dict[str, str]] = None,
|
162 |
+
) -> XetConnectionInfo:
|
163 |
+
"""
|
164 |
+
Requests the xet connection info from the supplied URL. This includes the
|
165 |
+
access token, expiration time, and endpoint to use for the xet storage service.
|
166 |
+
Args:
|
167 |
+
url: (`str`):
|
168 |
+
The access token endpoint URL.
|
169 |
+
headers (`Dict[str, str]`):
|
170 |
+
Headers to use for the request, including authorization headers and user agent.
|
171 |
+
params (`Dict[str, str]`, `optional`):
|
172 |
+
Additional parameters to pass with the request.
|
173 |
+
Returns:
|
174 |
+
`XetConnectionInfo`:
|
175 |
+
The connection information needed to make the request to the xet storage service.
|
176 |
+
Raises:
|
177 |
+
[`~utils.HfHubHTTPError`]
|
178 |
+
If the Hub API returned an error.
|
179 |
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
180 |
+
If the Hub API response is improperly formatted.
|
181 |
+
"""
|
182 |
+
resp = get_session().get(headers=headers, url=url, params=params)
|
183 |
+
hf_raise_for_status(resp)
|
184 |
+
|
185 |
+
metadata = parse_xet_connection_info_from_headers(resp.headers) # type: ignore
|
186 |
+
if metadata is None:
|
187 |
+
raise ValueError("Xet headers have not been correctly set by the server.")
|
188 |
+
return metadata
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/insecure_hashlib.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Taken from https://github.com/mlflow/mlflow/pull/10119
|
2 |
+
#
|
3 |
+
# DO NOT use this function for security purposes (e.g., password hashing).
|
4 |
+
#
|
5 |
+
# In Python >= 3.9, insecure hashing algorithms such as MD5 fail in FIPS-compliant
|
6 |
+
# environments unless `usedforsecurity=False` is explicitly passed.
|
7 |
+
#
|
8 |
+
# References:
|
9 |
+
# - https://github.com/mlflow/mlflow/issues/9905
|
10 |
+
# - https://github.com/mlflow/mlflow/pull/10119
|
11 |
+
# - https://docs.python.org/3/library/hashlib.html
|
12 |
+
# - https://github.com/huggingface/transformers/pull/27038
|
13 |
+
#
|
14 |
+
# Usage:
|
15 |
+
# ```python
|
16 |
+
# # Use
|
17 |
+
# from huggingface_hub.utils.insecure_hashlib import sha256
|
18 |
+
# # instead of
|
19 |
+
# from hashlib import sha256
|
20 |
+
#
|
21 |
+
# # Use
|
22 |
+
# from huggingface_hub.utils import insecure_hashlib
|
23 |
+
# # instead of
|
24 |
+
# import hashlib
|
25 |
+
# ```
|
26 |
+
import functools
|
27 |
+
import hashlib
|
28 |
+
import sys
|
29 |
+
|
30 |
+
|
31 |
+
if sys.version_info >= (3, 9):
|
32 |
+
md5 = functools.partial(hashlib.md5, usedforsecurity=False)
|
33 |
+
sha1 = functools.partial(hashlib.sha1, usedforsecurity=False)
|
34 |
+
sha256 = functools.partial(hashlib.sha256, usedforsecurity=False)
|
35 |
+
else:
|
36 |
+
md5 = hashlib.md5
|
37 |
+
sha1 = hashlib.sha1
|
38 |
+
sha256 = hashlib.sha256
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/logging.py
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# coding=utf-8
|
2 |
+
# Copyright 2020 Optuna, Hugging Face
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
"""Logging utilities."""
|
16 |
+
|
17 |
+
import logging
|
18 |
+
import os
|
19 |
+
from logging import (
|
20 |
+
CRITICAL, # NOQA
|
21 |
+
DEBUG, # NOQA
|
22 |
+
ERROR, # NOQA
|
23 |
+
FATAL, # NOQA
|
24 |
+
INFO, # NOQA
|
25 |
+
NOTSET, # NOQA
|
26 |
+
WARN, # NOQA
|
27 |
+
WARNING, # NOQA
|
28 |
+
)
|
29 |
+
from typing import Optional
|
30 |
+
|
31 |
+
from .. import constants
|
32 |
+
|
33 |
+
|
34 |
+
log_levels = {
|
35 |
+
"debug": logging.DEBUG,
|
36 |
+
"info": logging.INFO,
|
37 |
+
"warning": logging.WARNING,
|
38 |
+
"error": logging.ERROR,
|
39 |
+
"critical": logging.CRITICAL,
|
40 |
+
}
|
41 |
+
|
42 |
+
_default_log_level = logging.WARNING
|
43 |
+
|
44 |
+
|
45 |
+
def _get_library_name() -> str:
|
46 |
+
return __name__.split(".")[0]
|
47 |
+
|
48 |
+
|
49 |
+
def _get_library_root_logger() -> logging.Logger:
|
50 |
+
return logging.getLogger(_get_library_name())
|
51 |
+
|
52 |
+
|
53 |
+
def _get_default_logging_level():
|
54 |
+
"""
|
55 |
+
If `HF_HUB_VERBOSITY` env var is set to one of the valid choices return that as the new default level. If it is not
|
56 |
+
- fall back to `_default_log_level`
|
57 |
+
"""
|
58 |
+
env_level_str = os.getenv("HF_HUB_VERBOSITY", None)
|
59 |
+
if env_level_str:
|
60 |
+
if env_level_str in log_levels:
|
61 |
+
return log_levels[env_level_str]
|
62 |
+
else:
|
63 |
+
logging.getLogger().warning(
|
64 |
+
f"Unknown option HF_HUB_VERBOSITY={env_level_str}, has to be one of: {', '.join(log_levels.keys())}"
|
65 |
+
)
|
66 |
+
return _default_log_level
|
67 |
+
|
68 |
+
|
69 |
+
def _configure_library_root_logger() -> None:
|
70 |
+
library_root_logger = _get_library_root_logger()
|
71 |
+
library_root_logger.addHandler(logging.StreamHandler())
|
72 |
+
library_root_logger.setLevel(_get_default_logging_level())
|
73 |
+
|
74 |
+
|
75 |
+
def _reset_library_root_logger() -> None:
|
76 |
+
library_root_logger = _get_library_root_logger()
|
77 |
+
library_root_logger.setLevel(logging.NOTSET)
|
78 |
+
|
79 |
+
|
80 |
+
def get_logger(name: Optional[str] = None) -> logging.Logger:
|
81 |
+
"""
|
82 |
+
Returns a logger with the specified name. This function is not supposed
|
83 |
+
to be directly accessed by library users.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
name (`str`, *optional*):
|
87 |
+
The name of the logger to get, usually the filename
|
88 |
+
|
89 |
+
Example:
|
90 |
+
|
91 |
+
```python
|
92 |
+
>>> from huggingface_hub import get_logger
|
93 |
+
|
94 |
+
>>> logger = get_logger(__file__)
|
95 |
+
>>> logger.set_verbosity_info()
|
96 |
+
```
|
97 |
+
"""
|
98 |
+
|
99 |
+
if name is None:
|
100 |
+
name = _get_library_name()
|
101 |
+
|
102 |
+
return logging.getLogger(name)
|
103 |
+
|
104 |
+
|
105 |
+
def get_verbosity() -> int:
|
106 |
+
"""Return the current level for the HuggingFace Hub's root logger.
|
107 |
+
|
108 |
+
Returns:
|
109 |
+
Logging level, e.g., `huggingface_hub.logging.DEBUG` and
|
110 |
+
`huggingface_hub.logging.INFO`.
|
111 |
+
|
112 |
+
<Tip>
|
113 |
+
|
114 |
+
HuggingFace Hub has following logging levels:
|
115 |
+
|
116 |
+
- `huggingface_hub.logging.CRITICAL`, `huggingface_hub.logging.FATAL`
|
117 |
+
- `huggingface_hub.logging.ERROR`
|
118 |
+
- `huggingface_hub.logging.WARNING`, `huggingface_hub.logging.WARN`
|
119 |
+
- `huggingface_hub.logging.INFO`
|
120 |
+
- `huggingface_hub.logging.DEBUG`
|
121 |
+
|
122 |
+
</Tip>
|
123 |
+
"""
|
124 |
+
return _get_library_root_logger().getEffectiveLevel()
|
125 |
+
|
126 |
+
|
127 |
+
def set_verbosity(verbosity: int) -> None:
|
128 |
+
"""
|
129 |
+
Sets the level for the HuggingFace Hub's root logger.
|
130 |
+
|
131 |
+
Args:
|
132 |
+
verbosity (`int`):
|
133 |
+
Logging level, e.g., `huggingface_hub.logging.DEBUG` and
|
134 |
+
`huggingface_hub.logging.INFO`.
|
135 |
+
"""
|
136 |
+
_get_library_root_logger().setLevel(verbosity)
|
137 |
+
|
138 |
+
|
139 |
+
def set_verbosity_info():
|
140 |
+
"""
|
141 |
+
Sets the verbosity to `logging.INFO`.
|
142 |
+
"""
|
143 |
+
return set_verbosity(INFO)
|
144 |
+
|
145 |
+
|
146 |
+
def set_verbosity_warning():
|
147 |
+
"""
|
148 |
+
Sets the verbosity to `logging.WARNING`.
|
149 |
+
"""
|
150 |
+
return set_verbosity(WARNING)
|
151 |
+
|
152 |
+
|
153 |
+
def set_verbosity_debug():
|
154 |
+
"""
|
155 |
+
Sets the verbosity to `logging.DEBUG`.
|
156 |
+
"""
|
157 |
+
return set_verbosity(DEBUG)
|
158 |
+
|
159 |
+
|
160 |
+
def set_verbosity_error():
|
161 |
+
"""
|
162 |
+
Sets the verbosity to `logging.ERROR`.
|
163 |
+
"""
|
164 |
+
return set_verbosity(ERROR)
|
165 |
+
|
166 |
+
|
167 |
+
def disable_propagation() -> None:
|
168 |
+
"""
|
169 |
+
Disable propagation of the library log outputs. Note that log propagation is
|
170 |
+
disabled by default.
|
171 |
+
"""
|
172 |
+
_get_library_root_logger().propagate = False
|
173 |
+
|
174 |
+
|
175 |
+
def enable_propagation() -> None:
|
176 |
+
"""
|
177 |
+
Enable propagation of the library log outputs. Please disable the
|
178 |
+
HuggingFace Hub's default handler to prevent double logging if the root
|
179 |
+
logger has been configured.
|
180 |
+
"""
|
181 |
+
_get_library_root_logger().propagate = True
|
182 |
+
|
183 |
+
|
184 |
+
_configure_library_root_logger()
|
185 |
+
|
186 |
+
if constants.HF_DEBUG:
|
187 |
+
# If `HF_DEBUG` environment variable is set, set the verbosity of `huggingface_hub` logger to `DEBUG`.
|
188 |
+
set_verbosity_debug()
|
venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Utilities to efficiently compute the SHA 256 hash of a bunch of bytes."""
|
2 |
+
|
3 |
+
from typing import BinaryIO, Optional
|
4 |
+
|
5 |
+
from .insecure_hashlib import sha1, sha256
|
6 |
+
|
7 |
+
|
8 |
+
def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes:
|
9 |
+
"""
|
10 |
+
Computes the sha256 hash of the given file object, by chunks of size `chunk_size`.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
fileobj (file-like object):
|
14 |
+
The File object to compute sha256 for, typically obtained with `open(path, "rb")`
|
15 |
+
chunk_size (`int`, *optional*):
|
16 |
+
The number of bytes to read from `fileobj` at once, defaults to 1MB.
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
`bytes`: `fileobj`'s sha256 hash as bytes
|
20 |
+
"""
|
21 |
+
chunk_size = chunk_size if chunk_size is not None else 1024 * 1024
|
22 |
+
|
23 |
+
sha = sha256()
|
24 |
+
while True:
|
25 |
+
chunk = fileobj.read(chunk_size)
|
26 |
+
sha.update(chunk)
|
27 |
+
if not chunk:
|
28 |
+
break
|
29 |
+
return sha.digest()
|
30 |
+
|
31 |
+
|
32 |
+
def git_hash(data: bytes) -> str:
|
33 |
+
"""
|
34 |
+
Computes the git-sha1 hash of the given bytes, using the same algorithm as git.
|
35 |
+
|
36 |
+
This is equivalent to running `git hash-object`. See https://git-scm.com/docs/git-hash-object
|
37 |
+
for more details.
|
38 |
+
|
39 |
+
Note: this method is valid for regular files. For LFS files, the proper git hash is supposed to be computed on the
|
40 |
+
pointer file content, not the actual file content. However, for simplicity, we directly compare the sha256 of
|
41 |
+
the LFS file content when we want to compare LFS files.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
data (`bytes`):
|
45 |
+
The data to compute the git-hash for.
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
`str`: the git-hash of `data` as an hexadecimal string.
|
49 |
+
|
50 |
+
Example:
|
51 |
+
```python
|
52 |
+
>>> from huggingface_hub.utils.sha import git_hash
|
53 |
+
>>> git_hash(b"Hello, World!")
|
54 |
+
'b45ef6fec89518d314f546fd6c3025367b721684'
|
55 |
+
```
|
56 |
+
"""
|
57 |
+
# Taken from https://gist.github.com/msabramo/763200
|
58 |
+
# Note: no need to optimize by reading the file in chunks as we're not supposed to hash huge files (5MB maximum).
|
59 |
+
sha = sha1()
|
60 |
+
sha.update(b"blob ")
|
61 |
+
sha.update(str(len(data)).encode())
|
62 |
+
sha.update(b"\0")
|
63 |
+
sha.update(data)
|
64 |
+
return sha.hexdigest()
|
venv/lib/python3.12/site-packages/idna/__pycache__/core.cpython-312.pyc
ADDED
Binary file (16.2 kB). View file
|
|
venv/lib/python3.12/site-packages/idna/__pycache__/idnadata.cpython-312.pyc
ADDED
Binary file (99.5 kB). View file
|
|
venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
This software is made available under the terms of *either* of the licenses
|
2 |
+
found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
|
3 |
+
under the terms of *both* these licenses.
|
venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE.APACHE
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Apache License
|
3 |
+
Version 2.0, January 2004
|
4 |
+
http://www.apache.org/licenses/
|
5 |
+
|
6 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7 |
+
|
8 |
+
1. Definitions.
|
9 |
+
|
10 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
11 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
12 |
+
|
13 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14 |
+
the copyright owner that is granting the License.
|
15 |
+
|
16 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
17 |
+
other entities that control, are controlled by, or are under common
|
18 |
+
control with that entity. For the purposes of this definition,
|
19 |
+
"control" means (i) the power, direct or indirect, to cause the
|
20 |
+
direction or management of such entity, whether by contract or
|
21 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23 |
+
|
24 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25 |
+
exercising permissions granted by this License.
|
26 |
+
|
27 |
+
"Source" form shall mean the preferred form for making modifications,
|
28 |
+
including but not limited to software source code, documentation
|
29 |
+
source, and configuration files.
|
30 |
+
|
31 |
+
"Object" form shall mean any form resulting from mechanical
|
32 |
+
transformation or translation of a Source form, including but
|
33 |
+
not limited to compiled object code, generated documentation,
|
34 |
+
and conversions to other media types.
|
35 |
+
|
36 |
+
"Work" shall mean the work of authorship, whether in Source or
|
37 |
+
Object form, made available under the License, as indicated by a
|
38 |
+
copyright notice that is included in or attached to the work
|
39 |
+
(an example is provided in the Appendix below).
|
40 |
+
|
41 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42 |
+
form, that is based on (or derived from) the Work and for which the
|
43 |
+
editorial revisions, annotations, elaborations, or other modifications
|
44 |
+
represent, as a whole, an original work of authorship. For the purposes
|
45 |
+
of this License, Derivative Works shall not include works that remain
|
46 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47 |
+
the Work and Derivative Works thereof.
|
48 |
+
|
49 |
+
"Contribution" shall mean any work of authorship, including
|
50 |
+
the original version of the Work and any modifications or additions
|
51 |
+
to that Work or Derivative Works thereof, that is intentionally
|
52 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
55 |
+
means any form of electronic, verbal, or written communication sent
|
56 |
+
to the Licensor or its representatives, including but not limited to
|
57 |
+
communication on electronic mailing lists, source code control systems,
|
58 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
59 |
+
Licensor for the purpose of discussing and improving the Work, but
|
60 |
+
excluding communication that is conspicuously marked or otherwise
|
61 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
62 |
+
|
63 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64 |
+
on behalf of whom a Contribution has been received by Licensor and
|
65 |
+
subsequently incorporated within the Work.
|
66 |
+
|
67 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68 |
+
this License, each Contributor hereby grants to You a perpetual,
|
69 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70 |
+
copyright license to reproduce, prepare Derivative Works of,
|
71 |
+
publicly display, publicly perform, sublicense, and distribute the
|
72 |
+
Work and such Derivative Works in Source or Object form.
|
73 |
+
|
74 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75 |
+
this License, each Contributor hereby grants to You a perpetual,
|
76 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77 |
+
(except as stated in this section) patent license to make, have made,
|
78 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79 |
+
where such license applies only to those patent claims licensable
|
80 |
+
by such Contributor that are necessarily infringed by their
|
81 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
82 |
+
with the Work to which such Contribution(s) was submitted. If You
|
83 |
+
institute patent litigation against any entity (including a
|
84 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85 |
+
or a Contribution incorporated within the Work constitutes direct
|
86 |
+
or contributory patent infringement, then any patent licenses
|
87 |
+
granted to You under this License for that Work shall terminate
|
88 |
+
as of the date such litigation is filed.
|
89 |
+
|
90 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
91 |
+
Work or Derivative Works thereof in any medium, with or without
|
92 |
+
modifications, and in Source or Object form, provided that You
|
93 |
+
meet the following conditions:
|
94 |
+
|
95 |
+
(a) You must give any other recipients of the Work or
|
96 |
+
Derivative Works a copy of this License; and
|
97 |
+
|
98 |
+
(b) You must cause any modified files to carry prominent notices
|
99 |
+
stating that You changed the files; and
|
100 |
+
|
101 |
+
(c) You must retain, in the Source form of any Derivative Works
|
102 |
+
that You distribute, all copyright, patent, trademark, and
|
103 |
+
attribution notices from the Source form of the Work,
|
104 |
+
excluding those notices that do not pertain to any part of
|
105 |
+
the Derivative Works; and
|
106 |
+
|
107 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108 |
+
distribution, then any Derivative Works that You distribute must
|
109 |
+
include a readable copy of the attribution notices contained
|
110 |
+
within such NOTICE file, excluding those notices that do not
|
111 |
+
pertain to any part of the Derivative Works, in at least one
|
112 |
+
of the following places: within a NOTICE text file distributed
|
113 |
+
as part of the Derivative Works; within the Source form or
|
114 |
+
documentation, if provided along with the Derivative Works; or,
|
115 |
+
within a display generated by the Derivative Works, if and
|
116 |
+
wherever such third-party notices normally appear. The contents
|
117 |
+
of the NOTICE file are for informational purposes only and
|
118 |
+
do not modify the License. You may add Your own attribution
|
119 |
+
notices within Derivative Works that You distribute, alongside
|
120 |
+
or as an addendum to the NOTICE text from the Work, provided
|
121 |
+
that such additional attribution notices cannot be construed
|
122 |
+
as modifying the License.
|
123 |
+
|
124 |
+
You may add Your own copyright statement to Your modifications and
|
125 |
+
may provide additional or different license terms and conditions
|
126 |
+
for use, reproduction, or distribution of Your modifications, or
|
127 |
+
for any such Derivative Works as a whole, provided Your use,
|
128 |
+
reproduction, and distribution of the Work otherwise complies with
|
129 |
+
the conditions stated in this License.
|
130 |
+
|
131 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132 |
+
any Contribution intentionally submitted for inclusion in the Work
|
133 |
+
by You to the Licensor shall be under the terms and conditions of
|
134 |
+
this License, without any additional terms or conditions.
|
135 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136 |
+
the terms of any separate license agreement you may have executed
|
137 |
+
with Licensor regarding such Contributions.
|
138 |
+
|
139 |
+
6. Trademarks. This License does not grant permission to use the trade
|
140 |
+
names, trademarks, service marks, or product names of the Licensor,
|
141 |
+
except as required for reasonable and customary use in describing the
|
142 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
143 |
+
|
144 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145 |
+
agreed to in writing, Licensor provides the Work (and each
|
146 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148 |
+
implied, including, without limitation, any warranties or conditions
|
149 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151 |
+
appropriateness of using or redistributing the Work and assume any
|
152 |
+
risks associated with Your exercise of permissions under this License.
|
153 |
+
|
154 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
155 |
+
whether in tort (including negligence), contract, or otherwise,
|
156 |
+
unless required by applicable law (such as deliberate and grossly
|
157 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158 |
+
liable to You for damages, including any direct, indirect, special,
|
159 |
+
incidental, or consequential damages of any character arising as a
|
160 |
+
result of this License or out of the use or inability to use the
|
161 |
+
Work (including but not limited to damages for loss of goodwill,
|
162 |
+
work stoppage, computer failure or malfunction, or any and all
|
163 |
+
other commercial damages or losses), even if such Contributor
|
164 |
+
has been advised of the possibility of such damages.
|
165 |
+
|
166 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
168 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169 |
+
or other liability obligations and/or rights consistent with this
|
170 |
+
License. However, in accepting such obligations, You may act only
|
171 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172 |
+
of any other Contributor, and only if You agree to indemnify,
|
173 |
+
defend, and hold each Contributor harmless for any liability
|
174 |
+
incurred by, or claims asserted against, such Contributor by reason
|
175 |
+
of your accepting any such warranty or additional liability.
|
176 |
+
|
177 |
+
END OF TERMS AND CONDITIONS
|
venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE.BSD
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright (c) Donald Stufft and individual contributors.
|
2 |
+
All rights reserved.
|
3 |
+
|
4 |
+
Redistribution and use in source and binary forms, with or without
|
5 |
+
modification, are permitted provided that the following conditions are met:
|
6 |
+
|
7 |
+
1. Redistributions of source code must retain the above copyright notice,
|
8 |
+
this list of conditions and the following disclaimer.
|
9 |
+
|
10 |
+
2. Redistributions in binary form must reproduce the above copyright
|
11 |
+
notice, this list of conditions and the following disclaimer in the
|
12 |
+
documentation and/or other materials provided with the distribution.
|
13 |
+
|
14 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
15 |
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
16 |
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
17 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
18 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
19 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
20 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
21 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
22 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
23 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
venv/lib/python3.12/site-packages/pip/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (682 Bytes). View file
|
|
venv/lib/python3.12/site-packages/pip/__pycache__/__main__.cpython-312.pyc
ADDED
Binary file (838 Bytes). View file
|
|
venv/lib/python3.12/site-packages/pip/__pycache__/__pip-runner__.cpython-312.pyc
ADDED
Binary file (2.2 kB). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__init__.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Optional
|
2 |
+
|
3 |
+
from pip._internal.utils import _log
|
4 |
+
|
5 |
+
# init_logging() must be called before any call to logging.getLogger()
|
6 |
+
# which happens at import of most modules.
|
7 |
+
_log.init_logging()
|
8 |
+
|
9 |
+
|
10 |
+
def main(args: (Optional[List[str]]) = None) -> int:
|
11 |
+
"""This is preserved for old console scripts that may still be referencing
|
12 |
+
it.
|
13 |
+
|
14 |
+
For additional details, see https://github.com/pypa/pip/issues/7498.
|
15 |
+
"""
|
16 |
+
from pip._internal.utils.entrypoints import _wrapper
|
17 |
+
|
18 |
+
return _wrapper(args)
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (784 Bytes). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/build_env.cpython-312.pyc
ADDED
Binary file (14.3 kB). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/cache.cpython-312.pyc
ADDED
Binary file (12.7 kB). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/configuration.cpython-312.pyc
ADDED
Binary file (17.7 kB). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/exceptions.cpython-312.pyc
ADDED
Binary file (33.3 kB). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/main.cpython-312.pyc
ADDED
Binary file (667 Bytes). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/pyproject.cpython-312.pyc
ADDED
Binary file (4.97 kB). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/self_outdated_check.cpython-312.pyc
ADDED
Binary file (10.5 kB). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/wheel_builder.cpython-312.pyc
ADDED
Binary file (13.6 kB). View file
|
|
venv/lib/python3.12/site-packages/pip/_internal/build_env.py
ADDED
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Build Environment used for isolation during sdist building
|
2 |
+
"""
|
3 |
+
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import pathlib
|
7 |
+
import site
|
8 |
+
import sys
|
9 |
+
import textwrap
|
10 |
+
from collections import OrderedDict
|
11 |
+
from types import TracebackType
|
12 |
+
from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type, Union
|
13 |
+
|
14 |
+
from pip._vendor.certifi import where
|
15 |
+
from pip._vendor.packaging.requirements import Requirement
|
16 |
+
from pip._vendor.packaging.version import Version
|
17 |
+
|
18 |
+
from pip import __file__ as pip_location
|
19 |
+
from pip._internal.cli.spinners import open_spinner
|
20 |
+
from pip._internal.locations import get_platlib, get_purelib, get_scheme
|
21 |
+
from pip._internal.metadata import get_default_environment, get_environment
|
22 |
+
from pip._internal.utils.subprocess import call_subprocess
|
23 |
+
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
|
24 |
+
|
25 |
+
if TYPE_CHECKING:
|
26 |
+
from pip._internal.index.package_finder import PackageFinder
|
27 |
+
|
28 |
+
logger = logging.getLogger(__name__)
|
29 |
+
|
30 |
+
|
31 |
+
def _dedup(a: str, b: str) -> Union[Tuple[str], Tuple[str, str]]:
|
32 |
+
return (a, b) if a != b else (a,)
|
33 |
+
|
34 |
+
|
35 |
+
class _Prefix:
|
36 |
+
def __init__(self, path: str) -> None:
|
37 |
+
self.path = path
|
38 |
+
self.setup = False
|
39 |
+
scheme = get_scheme("", prefix=path)
|
40 |
+
self.bin_dir = scheme.scripts
|
41 |
+
self.lib_dirs = _dedup(scheme.purelib, scheme.platlib)
|
42 |
+
|
43 |
+
|
44 |
+
def get_runnable_pip() -> str:
|
45 |
+
"""Get a file to pass to a Python executable, to run the currently-running pip.
|
46 |
+
|
47 |
+
This is used to run a pip subprocess, for installing requirements into the build
|
48 |
+
environment.
|
49 |
+
"""
|
50 |
+
source = pathlib.Path(pip_location).resolve().parent
|
51 |
+
|
52 |
+
if not source.is_dir():
|
53 |
+
# This would happen if someone is using pip from inside a zip file. In that
|
54 |
+
# case, we can use that directly.
|
55 |
+
return str(source)
|
56 |
+
|
57 |
+
return os.fsdecode(source / "__pip-runner__.py")
|
58 |
+
|
59 |
+
|
60 |
+
def _get_system_sitepackages() -> Set[str]:
|
61 |
+
"""Get system site packages
|
62 |
+
|
63 |
+
Usually from site.getsitepackages,
|
64 |
+
but fallback on `get_purelib()/get_platlib()` if unavailable
|
65 |
+
(e.g. in a virtualenv created by virtualenv<20)
|
66 |
+
|
67 |
+
Returns normalized set of strings.
|
68 |
+
"""
|
69 |
+
if hasattr(site, "getsitepackages"):
|
70 |
+
system_sites = site.getsitepackages()
|
71 |
+
else:
|
72 |
+
# virtualenv < 20 overwrites site.py without getsitepackages
|
73 |
+
# fallback on get_purelib/get_platlib.
|
74 |
+
# this is known to miss things, but shouldn't in the cases
|
75 |
+
# where getsitepackages() has been removed (inside a virtualenv)
|
76 |
+
system_sites = [get_purelib(), get_platlib()]
|
77 |
+
return {os.path.normcase(path) for path in system_sites}
|
78 |
+
|
79 |
+
|
80 |
+
class BuildEnvironment:
|
81 |
+
"""Creates and manages an isolated environment to install build deps"""
|
82 |
+
|
83 |
+
def __init__(self) -> None:
|
84 |
+
temp_dir = TempDirectory(kind=tempdir_kinds.BUILD_ENV, globally_managed=True)
|
85 |
+
|
86 |
+
self._prefixes = OrderedDict(
|
87 |
+
(name, _Prefix(os.path.join(temp_dir.path, name)))
|
88 |
+
for name in ("normal", "overlay")
|
89 |
+
)
|
90 |
+
|
91 |
+
self._bin_dirs: List[str] = []
|
92 |
+
self._lib_dirs: List[str] = []
|
93 |
+
for prefix in reversed(list(self._prefixes.values())):
|
94 |
+
self._bin_dirs.append(prefix.bin_dir)
|
95 |
+
self._lib_dirs.extend(prefix.lib_dirs)
|
96 |
+
|
97 |
+
# Customize site to:
|
98 |
+
# - ensure .pth files are honored
|
99 |
+
# - prevent access to system site packages
|
100 |
+
system_sites = _get_system_sitepackages()
|
101 |
+
|
102 |
+
self._site_dir = os.path.join(temp_dir.path, "site")
|
103 |
+
if not os.path.exists(self._site_dir):
|
104 |
+
os.mkdir(self._site_dir)
|
105 |
+
with open(
|
106 |
+
os.path.join(self._site_dir, "sitecustomize.py"), "w", encoding="utf-8"
|
107 |
+
) as fp:
|
108 |
+
fp.write(
|
109 |
+
textwrap.dedent(
|
110 |
+
"""
|
111 |
+
import os, site, sys
|
112 |
+
|
113 |
+
# First, drop system-sites related paths.
|
114 |
+
original_sys_path = sys.path[:]
|
115 |
+
known_paths = set()
|
116 |
+
for path in {system_sites!r}:
|
117 |
+
site.addsitedir(path, known_paths=known_paths)
|
118 |
+
system_paths = set(
|
119 |
+
os.path.normcase(path)
|
120 |
+
for path in sys.path[len(original_sys_path):]
|
121 |
+
)
|
122 |
+
original_sys_path = [
|
123 |
+
path for path in original_sys_path
|
124 |
+
if os.path.normcase(path) not in system_paths
|
125 |
+
]
|
126 |
+
sys.path = original_sys_path
|
127 |
+
|
128 |
+
# Second, add lib directories.
|
129 |
+
# ensuring .pth file are processed.
|
130 |
+
for path in {lib_dirs!r}:
|
131 |
+
assert not path in sys.path
|
132 |
+
site.addsitedir(path)
|
133 |
+
"""
|
134 |
+
).format(system_sites=system_sites, lib_dirs=self._lib_dirs)
|
135 |
+
)
|
136 |
+
|
137 |
+
def __enter__(self) -> None:
|
138 |
+
self._save_env = {
|
139 |
+
name: os.environ.get(name, None)
|
140 |
+
for name in ("PATH", "PYTHONNOUSERSITE", "PYTHONPATH")
|
141 |
+
}
|
142 |
+
|
143 |
+
path = self._bin_dirs[:]
|
144 |
+
old_path = self._save_env["PATH"]
|
145 |
+
if old_path:
|
146 |
+
path.extend(old_path.split(os.pathsep))
|
147 |
+
|
148 |
+
pythonpath = [self._site_dir]
|
149 |
+
|
150 |
+
os.environ.update(
|
151 |
+
{
|
152 |
+
"PATH": os.pathsep.join(path),
|
153 |
+
"PYTHONNOUSERSITE": "1",
|
154 |
+
"PYTHONPATH": os.pathsep.join(pythonpath),
|
155 |
+
}
|
156 |
+
)
|
157 |
+
|
158 |
+
def __exit__(
|
159 |
+
self,
|
160 |
+
exc_type: Optional[Type[BaseException]],
|
161 |
+
exc_val: Optional[BaseException],
|
162 |
+
exc_tb: Optional[TracebackType],
|
163 |
+
) -> None:
|
164 |
+
for varname, old_value in self._save_env.items():
|
165 |
+
if old_value is None:
|
166 |
+
os.environ.pop(varname, None)
|
167 |
+
else:
|
168 |
+
os.environ[varname] = old_value
|
169 |
+
|
170 |
+
def check_requirements(
|
171 |
+
self, reqs: Iterable[str]
|
172 |
+
) -> Tuple[Set[Tuple[str, str]], Set[str]]:
|
173 |
+
"""Return 2 sets:
|
174 |
+
- conflicting requirements: set of (installed, wanted) reqs tuples
|
175 |
+
- missing requirements: set of reqs
|
176 |
+
"""
|
177 |
+
missing = set()
|
178 |
+
conflicting = set()
|
179 |
+
if reqs:
|
180 |
+
env = (
|
181 |
+
get_environment(self._lib_dirs)
|
182 |
+
if hasattr(self, "_lib_dirs")
|
183 |
+
else get_default_environment()
|
184 |
+
)
|
185 |
+
for req_str in reqs:
|
186 |
+
req = Requirement(req_str)
|
187 |
+
# We're explicitly evaluating with an empty extra value, since build
|
188 |
+
# environments are not provided any mechanism to select specific extras.
|
189 |
+
if req.marker is not None and not req.marker.evaluate({"extra": ""}):
|
190 |
+
continue
|
191 |
+
dist = env.get_distribution(req.name)
|
192 |
+
if not dist:
|
193 |
+
missing.add(req_str)
|
194 |
+
continue
|
195 |
+
if isinstance(dist.version, Version):
|
196 |
+
installed_req_str = f"{req.name}=={dist.version}"
|
197 |
+
else:
|
198 |
+
installed_req_str = f"{req.name}==={dist.version}"
|
199 |
+
if not req.specifier.contains(dist.version, prereleases=True):
|
200 |
+
conflicting.add((installed_req_str, req_str))
|
201 |
+
# FIXME: Consider direct URL?
|
202 |
+
return conflicting, missing
|
203 |
+
|
204 |
+
def install_requirements(
|
205 |
+
self,
|
206 |
+
finder: "PackageFinder",
|
207 |
+
requirements: Iterable[str],
|
208 |
+
prefix_as_string: str,
|
209 |
+
*,
|
210 |
+
kind: str,
|
211 |
+
) -> None:
|
212 |
+
prefix = self._prefixes[prefix_as_string]
|
213 |
+
assert not prefix.setup
|
214 |
+
prefix.setup = True
|
215 |
+
if not requirements:
|
216 |
+
return
|
217 |
+
self._install_requirements(
|
218 |
+
get_runnable_pip(),
|
219 |
+
finder,
|
220 |
+
requirements,
|
221 |
+
prefix,
|
222 |
+
kind=kind,
|
223 |
+
)
|
224 |
+
|
225 |
+
@staticmethod
|
226 |
+
def _install_requirements(
|
227 |
+
pip_runnable: str,
|
228 |
+
finder: "PackageFinder",
|
229 |
+
requirements: Iterable[str],
|
230 |
+
prefix: _Prefix,
|
231 |
+
*,
|
232 |
+
kind: str,
|
233 |
+
) -> None:
|
234 |
+
args: List[str] = [
|
235 |
+
sys.executable,
|
236 |
+
pip_runnable,
|
237 |
+
"install",
|
238 |
+
"--ignore-installed",
|
239 |
+
"--no-user",
|
240 |
+
"--prefix",
|
241 |
+
prefix.path,
|
242 |
+
"--no-warn-script-location",
|
243 |
+
]
|
244 |
+
if logger.getEffectiveLevel() <= logging.DEBUG:
|
245 |
+
args.append("-v")
|
246 |
+
for format_control in ("no_binary", "only_binary"):
|
247 |
+
formats = getattr(finder.format_control, format_control)
|
248 |
+
args.extend(
|
249 |
+
(
|
250 |
+
"--" + format_control.replace("_", "-"),
|
251 |
+
",".join(sorted(formats or {":none:"})),
|
252 |
+
)
|
253 |
+
)
|
254 |
+
|
255 |
+
index_urls = finder.index_urls
|
256 |
+
if index_urls:
|
257 |
+
args.extend(["-i", index_urls[0]])
|
258 |
+
for extra_index in index_urls[1:]:
|
259 |
+
args.extend(["--extra-index-url", extra_index])
|
260 |
+
else:
|
261 |
+
args.append("--no-index")
|
262 |
+
for link in finder.find_links:
|
263 |
+
args.extend(["--find-links", link])
|
264 |
+
|
265 |
+
for host in finder.trusted_hosts:
|
266 |
+
args.extend(["--trusted-host", host])
|
267 |
+
if finder.allow_all_prereleases:
|
268 |
+
args.append("--pre")
|
269 |
+
if finder.prefer_binary:
|
270 |
+
args.append("--prefer-binary")
|
271 |
+
args.append("--")
|
272 |
+
args.extend(requirements)
|
273 |
+
extra_environ = {"_PIP_STANDALONE_CERT": where()}
|
274 |
+
with open_spinner(f"Installing {kind}") as spinner:
|
275 |
+
call_subprocess(
|
276 |
+
args,
|
277 |
+
command_desc=f"pip subprocess to install {kind}",
|
278 |
+
spinner=spinner,
|
279 |
+
extra_environ=extra_environ,
|
280 |
+
)
|
281 |
+
|
282 |
+
|
283 |
+
class NoOpBuildEnvironment(BuildEnvironment):
|
284 |
+
"""A no-op drop-in replacement for BuildEnvironment"""
|
285 |
+
|
286 |
+
def __init__(self) -> None:
|
287 |
+
pass
|
288 |
+
|
289 |
+
def __enter__(self) -> None:
|
290 |
+
pass
|
291 |
+
|
292 |
+
def __exit__(
|
293 |
+
self,
|
294 |
+
exc_type: Optional[Type[BaseException]],
|
295 |
+
exc_val: Optional[BaseException],
|
296 |
+
exc_tb: Optional[TracebackType],
|
297 |
+
) -> None:
|
298 |
+
pass
|
299 |
+
|
300 |
+
def cleanup(self) -> None:
|
301 |
+
pass
|
302 |
+
|
303 |
+
def install_requirements(
|
304 |
+
self,
|
305 |
+
finder: "PackageFinder",
|
306 |
+
requirements: Iterable[str],
|
307 |
+
prefix_as_string: str,
|
308 |
+
*,
|
309 |
+
kind: str,
|
310 |
+
) -> None:
|
311 |
+
raise NotImplementedError()
|