Akash6776 commited on
Commit
8d45501
·
verified ·
1 Parent(s): 1db8a6e

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. 17.CherryPimps.Give Me That Cock LIVE.mp4 +3 -0
  3. 17.Clips4Sale.Family Therapy.Let Daddy Help.mp4 +3 -0
  4. 17.RealityKings.Teaching Her To Suck Cock.mp4 +3 -0
  5. venv/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc +0 -0
  6. venv/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc +0 -0
  7. venv/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc +0 -0
  8. venv/lib/python3.12/site-packages/filelock/__pycache__/__init__.cpython-312.pyc +0 -0
  9. venv/lib/python3.12/site-packages/filelock/__pycache__/_api.cpython-312.pyc +0 -0
  10. venv/lib/python3.12/site-packages/filelock/__pycache__/_error.cpython-312.pyc +0 -0
  11. venv/lib/python3.12/site-packages/filelock/__pycache__/_soft.cpython-312.pyc +0 -0
  12. venv/lib/python3.12/site-packages/filelock/__pycache__/_util.cpython-312.pyc +0 -0
  13. venv/lib/python3.12/site-packages/filelock/__pycache__/_windows.cpython-312.pyc +0 -0
  14. venv/lib/python3.12/site-packages/filelock/__pycache__/asyncio.cpython-312.pyc +0 -0
  15. venv/lib/python3.12/site-packages/filelock/__pycache__/version.cpython-312.pyc +0 -0
  16. venv/lib/python3.12/site-packages/hf_xet-1.1.5.dist-info/licenses/LICENSE +201 -0
  17. venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py +135 -0
  18. venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_manager.py +896 -0
  19. venv/lib/python3.12/site-packages/huggingface_hub/utils/_datetime.py +67 -0
  20. venv/lib/python3.12/site-packages/huggingface_hub/utils/_deprecation.py +136 -0
  21. venv/lib/python3.12/site-packages/huggingface_hub/utils/_experimental.py +68 -0
  22. venv/lib/python3.12/site-packages/huggingface_hub/utils/_fixes.py +133 -0
  23. venv/lib/python3.12/site-packages/huggingface_hub/utils/_http.py +637 -0
  24. venv/lib/python3.12/site-packages/huggingface_hub/utils/_pagination.py +52 -0
  25. venv/lib/python3.12/site-packages/huggingface_hub/utils/_paths.py +141 -0
  26. venv/lib/python3.12/site-packages/huggingface_hub/utils/_safetensors.py +111 -0
  27. venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py +226 -0
  28. venv/lib/python3.12/site-packages/huggingface_hub/utils/_xet.py +188 -0
  29. venv/lib/python3.12/site-packages/huggingface_hub/utils/insecure_hashlib.py +38 -0
  30. venv/lib/python3.12/site-packages/huggingface_hub/utils/logging.py +188 -0
  31. venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py +64 -0
  32. venv/lib/python3.12/site-packages/idna/__pycache__/core.cpython-312.pyc +0 -0
  33. venv/lib/python3.12/site-packages/idna/__pycache__/idnadata.cpython-312.pyc +0 -0
  34. venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE +3 -0
  35. venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE.APACHE +177 -0
  36. venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE.BSD +23 -0
  37. venv/lib/python3.12/site-packages/pip/__pycache__/__init__.cpython-312.pyc +0 -0
  38. venv/lib/python3.12/site-packages/pip/__pycache__/__main__.cpython-312.pyc +0 -0
  39. venv/lib/python3.12/site-packages/pip/__pycache__/__pip-runner__.cpython-312.pyc +0 -0
  40. venv/lib/python3.12/site-packages/pip/_internal/__init__.py +18 -0
  41. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/__init__.cpython-312.pyc +0 -0
  42. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/build_env.cpython-312.pyc +0 -0
  43. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/cache.cpython-312.pyc +0 -0
  44. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/configuration.cpython-312.pyc +0 -0
  45. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/exceptions.cpython-312.pyc +0 -0
  46. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/main.cpython-312.pyc +0 -0
  47. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/pyproject.cpython-312.pyc +0 -0
  48. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/self_outdated_check.cpython-312.pyc +0 -0
  49. venv/lib/python3.12/site-packages/pip/_internal/__pycache__/wheel_builder.cpython-312.pyc +0 -0
  50. venv/lib/python3.12/site-packages/pip/_internal/build_env.py +311 -0
.gitattributes CHANGED
@@ -136,3 +136,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
136
  19.Deeper.Dare[[:space:]]You.101231.mp4 filter=lfs diff=lfs merge=lfs -text
137
  17.Mofos.Spinner[[:space:]]Sucks[[:space:]]Cock[[:space:]]for[[:space:]]Fame.mp4 filter=lfs diff=lfs merge=lfs -text
138
  18.Mofos.A[[:space:]]Sneaky[[:space:]]Threesome[[:space:]]Situation.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
136
  19.Deeper.Dare[[:space:]]You.101231.mp4 filter=lfs diff=lfs merge=lfs -text
137
  17.Mofos.Spinner[[:space:]]Sucks[[:space:]]Cock[[:space:]]for[[:space:]]Fame.mp4 filter=lfs diff=lfs merge=lfs -text
138
  18.Mofos.A[[:space:]]Sneaky[[:space:]]Threesome[[:space:]]Situation.mp4 filter=lfs diff=lfs merge=lfs -text
139
+ 17.Clips4Sale.Family[[:space:]]Therapy.Let[[:space:]]Daddy[[:space:]]Help.mp4 filter=lfs diff=lfs merge=lfs -text
140
+ 17.CherryPimps.Give[[:space:]]Me[[:space:]]That[[:space:]]Cock[[:space:]]LIVE.mp4 filter=lfs diff=lfs merge=lfs -text
141
+ 17.RealityKings.Teaching[[:space:]]Her[[:space:]]To[[:space:]]Suck[[:space:]]Cock.mp4 filter=lfs diff=lfs merge=lfs -text
17.CherryPimps.Give Me That Cock LIVE.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0801b315fee505e3bf8263b012cdbab11bb8eae47f3a921b6695951cbeab4fa7
3
+ size 9009317124
17.Clips4Sale.Family Therapy.Let Daddy Help.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17607983880ebc83ade9468ffa6a25940422058756d187cc60b13908d29cd67a
3
+ size 159946715
17.RealityKings.Teaching Her To Suck Cock.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:752a736937fc5e9bee0a332ed57170cbf4aa42462d587e57d39510388cc98dde
3
+ size 2386235157
venv/lib/python3.12/site-packages/certifi/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (304 Bytes). View file
 
venv/lib/python3.12/site-packages/certifi/__pycache__/__main__.cpython-312.pyc ADDED
Binary file (619 Bytes). View file
 
venv/lib/python3.12/site-packages/certifi/__pycache__/core.cpython-312.pyc ADDED
Binary file (2.05 kB). View file
 
venv/lib/python3.12/site-packages/filelock/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.57 kB). View file
 
venv/lib/python3.12/site-packages/filelock/__pycache__/_api.cpython-312.pyc ADDED
Binary file (16.6 kB). View file
 
venv/lib/python3.12/site-packages/filelock/__pycache__/_error.cpython-312.pyc ADDED
Binary file (1.75 kB). View file
 
venv/lib/python3.12/site-packages/filelock/__pycache__/_soft.cpython-312.pyc ADDED
Binary file (2.46 kB). View file
 
venv/lib/python3.12/site-packages/filelock/__pycache__/_util.cpython-312.pyc ADDED
Binary file (2 kB). View file
 
venv/lib/python3.12/site-packages/filelock/__pycache__/_windows.cpython-312.pyc ADDED
Binary file (3.26 kB). View file
 
venv/lib/python3.12/site-packages/filelock/__pycache__/asyncio.cpython-312.pyc ADDED
Binary file (15.6 kB). View file
 
venv/lib/python3.12/site-packages/filelock/__pycache__/version.cpython-312.pyc ADDED
Binary file (639 Bytes). View file
 
venv/lib/python3.12/site-packages/hf_xet-1.1.5.dist-info/licenses/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_assets.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2019-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ from pathlib import Path
16
+ from typing import Union
17
+
18
+ from ..constants import HF_ASSETS_CACHE
19
+
20
+
21
+ def cached_assets_path(
22
+ library_name: str,
23
+ namespace: str = "default",
24
+ subfolder: str = "default",
25
+ *,
26
+ assets_dir: Union[str, Path, None] = None,
27
+ ):
28
+ """Return a folder path to cache arbitrary files.
29
+
30
+ `huggingface_hub` provides a canonical folder path to store assets. This is the
31
+ recommended way to integrate cache in a downstream library as it will benefit from
32
+ the builtins tools to scan and delete the cache properly.
33
+
34
+ The distinction is made between files cached from the Hub and assets. Files from the
35
+ Hub are cached in a git-aware manner and entirely managed by `huggingface_hub`. See
36
+ [related documentation](https://huggingface.co/docs/huggingface_hub/how-to-cache).
37
+ All other files that a downstream library caches are considered to be "assets"
38
+ (files downloaded from external sources, extracted from a .tar archive, preprocessed
39
+ for training,...).
40
+
41
+ Once the folder path is generated, it is guaranteed to exist and to be a directory.
42
+ The path is based on 3 levels of depth: the library name, a namespace and a
43
+ subfolder. Those 3 levels grants flexibility while allowing `huggingface_hub` to
44
+ expect folders when scanning/deleting parts of the assets cache. Within a library,
45
+ it is expected that all namespaces share the same subset of subfolder names but this
46
+ is not a mandatory rule. The downstream library has then full control on which file
47
+ structure to adopt within its cache. Namespace and subfolder are optional (would
48
+ default to a `"default/"` subfolder) but library name is mandatory as we want every
49
+ downstream library to manage its own cache.
50
+
51
+ Expected tree:
52
+ ```text
53
+ assets/
54
+ └── datasets/
55
+ │ ├── SQuAD/
56
+ │ │ ├── downloaded/
57
+ │ │ ├── extracted/
58
+ │ │ └── processed/
59
+ │ ├── Helsinki-NLP--tatoeba_mt/
60
+ │ ├── downloaded/
61
+ │ ├── extracted/
62
+ │ └── processed/
63
+ └── transformers/
64
+ ├── default/
65
+ │ ├── something/
66
+ ├── bert-base-cased/
67
+ │ ├── default/
68
+ │ └── training/
69
+ hub/
70
+ └── models--julien-c--EsperBERTo-small/
71
+ ├── blobs/
72
+ │ ├── (...)
73
+ │ ├── (...)
74
+ ├── refs/
75
+ │ └── (...)
76
+ └── [ 128] snapshots/
77
+ ├── 2439f60ef33a0d46d85da5001d52aeda5b00ce9f/
78
+ │ ├── (...)
79
+ └── bbc77c8132af1cc5cf678da3f1ddf2de43606d48/
80
+ └── (...)
81
+ ```
82
+
83
+
84
+ Args:
85
+ library_name (`str`):
86
+ Name of the library that will manage the cache folder. Example: `"dataset"`.
87
+ namespace (`str`, *optional*, defaults to "default"):
88
+ Namespace to which the data belongs. Example: `"SQuAD"`.
89
+ subfolder (`str`, *optional*, defaults to "default"):
90
+ Subfolder in which the data will be stored. Example: `extracted`.
91
+ assets_dir (`str`, `Path`, *optional*):
92
+ Path to the folder where assets are cached. This must not be the same folder
93
+ where Hub files are cached. Defaults to `HF_HOME / "assets"` if not provided.
94
+ Can also be set with `HF_ASSETS_CACHE` environment variable.
95
+
96
+ Returns:
97
+ Path to the cache folder (`Path`).
98
+
99
+ Example:
100
+ ```py
101
+ >>> from huggingface_hub import cached_assets_path
102
+
103
+ >>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="download")
104
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/download')
105
+
106
+ >>> cached_assets_path(library_name="datasets", namespace="SQuAD", subfolder="extracted")
107
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/SQuAD/extracted')
108
+
109
+ >>> cached_assets_path(library_name="datasets", namespace="Helsinki-NLP/tatoeba_mt")
110
+ PosixPath('/home/wauplin/.cache/huggingface/extra/datasets/Helsinki-NLP--tatoeba_mt/default')
111
+
112
+ >>> cached_assets_path(library_name="datasets", assets_dir="/tmp/tmp123456")
113
+ PosixPath('/tmp/tmp123456/datasets/default/default')
114
+ ```
115
+ """
116
+ # Resolve assets_dir
117
+ if assets_dir is None:
118
+ assets_dir = HF_ASSETS_CACHE
119
+ assets_dir = Path(assets_dir).expanduser().resolve()
120
+
121
+ # Avoid names that could create path issues
122
+ for part in (" ", "/", "\\"):
123
+ library_name = library_name.replace(part, "--")
124
+ namespace = namespace.replace(part, "--")
125
+ subfolder = subfolder.replace(part, "--")
126
+
127
+ # Path to subfolder is created
128
+ path = assets_dir / library_name / namespace / subfolder
129
+ try:
130
+ path.mkdir(exist_ok=True, parents=True)
131
+ except (FileExistsError, NotADirectoryError):
132
+ raise ValueError(f"Corrupted assets folder: cannot create directory because of an existing file ({path}).")
133
+
134
+ # Return
135
+ return path
venv/lib/python3.12/site-packages/huggingface_hub/utils/_cache_manager.py ADDED
@@ -0,0 +1,896 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to manage the HF cache directory."""
16
+
17
+ import os
18
+ import shutil
19
+ import time
20
+ from collections import defaultdict
21
+ from dataclasses import dataclass
22
+ from pathlib import Path
23
+ from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
24
+
25
+ from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
26
+
27
+ from ..commands._cli_utils import tabulate
28
+ from ..constants import HF_HUB_CACHE
29
+ from . import logging
30
+
31
+
32
+ logger = logging.get_logger(__name__)
33
+
34
+ REPO_TYPE_T = Literal["model", "dataset", "space"]
35
+
36
+ # List of OS-created helper files that need to be ignored
37
+ FILES_TO_IGNORE = [".DS_Store"]
38
+
39
+
40
+ @dataclass(frozen=True)
41
+ class CachedFileInfo:
42
+ """Frozen data structure holding information about a single cached file.
43
+
44
+ Args:
45
+ file_name (`str`):
46
+ Name of the file. Example: `config.json`.
47
+ file_path (`Path`):
48
+ Path of the file in the `snapshots` directory. The file path is a symlink
49
+ referring to a blob in the `blobs` folder.
50
+ blob_path (`Path`):
51
+ Path of the blob file. This is equivalent to `file_path.resolve()`.
52
+ size_on_disk (`int`):
53
+ Size of the blob file in bytes.
54
+ blob_last_accessed (`float`):
55
+ Timestamp of the last time the blob file has been accessed (from any
56
+ revision).
57
+ blob_last_modified (`float`):
58
+ Timestamp of the last time the blob file has been modified/created.
59
+
60
+ <Tip warning={true}>
61
+
62
+ `blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
63
+ are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
64
+ for more details.
65
+
66
+ </Tip>
67
+ """
68
+
69
+ file_name: str
70
+ file_path: Path
71
+ blob_path: Path
72
+ size_on_disk: int
73
+
74
+ blob_last_accessed: float
75
+ blob_last_modified: float
76
+
77
+ @property
78
+ def blob_last_accessed_str(self) -> str:
79
+ """
80
+ (property) Timestamp of the last time the blob file has been accessed (from any
81
+ revision), returned as a human-readable string.
82
+
83
+ Example: "2 weeks ago".
84
+ """
85
+ return _format_timesince(self.blob_last_accessed)
86
+
87
+ @property
88
+ def blob_last_modified_str(self) -> str:
89
+ """
90
+ (property) Timestamp of the last time the blob file has been modified, returned
91
+ as a human-readable string.
92
+
93
+ Example: "2 weeks ago".
94
+ """
95
+ return _format_timesince(self.blob_last_modified)
96
+
97
+ @property
98
+ def size_on_disk_str(self) -> str:
99
+ """
100
+ (property) Size of the blob file as a human-readable string.
101
+
102
+ Example: "42.2K".
103
+ """
104
+ return _format_size(self.size_on_disk)
105
+
106
+
107
+ @dataclass(frozen=True)
108
+ class CachedRevisionInfo:
109
+ """Frozen data structure holding information about a revision.
110
+
111
+ A revision correspond to a folder in the `snapshots` folder and is populated with
112
+ the exact tree structure as the repo on the Hub but contains only symlinks. A
113
+ revision can be either referenced by 1 or more `refs` or be "detached" (no refs).
114
+
115
+ Args:
116
+ commit_hash (`str`):
117
+ Hash of the revision (unique).
118
+ Example: `"9338f7b671827df886678df2bdd7cc7b4f36dffd"`.
119
+ snapshot_path (`Path`):
120
+ Path to the revision directory in the `snapshots` folder. It contains the
121
+ exact tree structure as the repo on the Hub.
122
+ files: (`FrozenSet[CachedFileInfo]`):
123
+ Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
124
+ refs (`FrozenSet[str]`):
125
+ Set of `refs` pointing to this revision. If the revision has no `refs`, it
126
+ is considered detached.
127
+ Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
128
+ size_on_disk (`int`):
129
+ Sum of the blob file sizes that are symlink-ed by the revision.
130
+ last_modified (`float`):
131
+ Timestamp of the last time the revision has been created/modified.
132
+
133
+ <Tip warning={true}>
134
+
135
+ `last_accessed` cannot be determined correctly on a single revision as blob files
136
+ are shared across revisions.
137
+
138
+ </Tip>
139
+
140
+ <Tip warning={true}>
141
+
142
+ `size_on_disk` is not necessarily the sum of all file sizes because of possible
143
+ duplicated files. Besides, only blobs are taken into account, not the (negligible)
144
+ size of folders and symlinks.
145
+
146
+ </Tip>
147
+ """
148
+
149
+ commit_hash: str
150
+ snapshot_path: Path
151
+ size_on_disk: int
152
+ files: FrozenSet[CachedFileInfo]
153
+ refs: FrozenSet[str]
154
+
155
+ last_modified: float
156
+
157
+ @property
158
+ def last_modified_str(self) -> str:
159
+ """
160
+ (property) Timestamp of the last time the revision has been modified, returned
161
+ as a human-readable string.
162
+
163
+ Example: "2 weeks ago".
164
+ """
165
+ return _format_timesince(self.last_modified)
166
+
167
+ @property
168
+ def size_on_disk_str(self) -> str:
169
+ """
170
+ (property) Sum of the blob file sizes as a human-readable string.
171
+
172
+ Example: "42.2K".
173
+ """
174
+ return _format_size(self.size_on_disk)
175
+
176
+ @property
177
+ def nb_files(self) -> int:
178
+ """
179
+ (property) Total number of files in the revision.
180
+ """
181
+ return len(self.files)
182
+
183
+
184
+ @dataclass(frozen=True)
185
+ class CachedRepoInfo:
186
+ """Frozen data structure holding information about a cached repository.
187
+
188
+ Args:
189
+ repo_id (`str`):
190
+ Repo id of the repo on the Hub. Example: `"google/fleurs"`.
191
+ repo_type (`Literal["dataset", "model", "space"]`):
192
+ Type of the cached repo.
193
+ repo_path (`Path`):
194
+ Local path to the cached repo.
195
+ size_on_disk (`int`):
196
+ Sum of the blob file sizes in the cached repo.
197
+ nb_files (`int`):
198
+ Total number of blob files in the cached repo.
199
+ revisions (`FrozenSet[CachedRevisionInfo]`):
200
+ Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
201
+ last_accessed (`float`):
202
+ Timestamp of the last time a blob file of the repo has been accessed.
203
+ last_modified (`float`):
204
+ Timestamp of the last time a blob file of the repo has been modified/created.
205
+
206
+ <Tip warning={true}>
207
+
208
+ `size_on_disk` is not necessarily the sum of all revisions sizes because of
209
+ duplicated files. Besides, only blobs are taken into account, not the (negligible)
210
+ size of folders and symlinks.
211
+
212
+ </Tip>
213
+
214
+ <Tip warning={true}>
215
+
216
+ `last_accessed` and `last_modified` reliability can depend on the OS you are using.
217
+ See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
218
+ for more details.
219
+
220
+ </Tip>
221
+ """
222
+
223
+ repo_id: str
224
+ repo_type: REPO_TYPE_T
225
+ repo_path: Path
226
+ size_on_disk: int
227
+ nb_files: int
228
+ revisions: FrozenSet[CachedRevisionInfo]
229
+
230
+ last_accessed: float
231
+ last_modified: float
232
+
233
+ @property
234
+ def last_accessed_str(self) -> str:
235
+ """
236
+ (property) Last time a blob file of the repo has been accessed, returned as a
237
+ human-readable string.
238
+
239
+ Example: "2 weeks ago".
240
+ """
241
+ return _format_timesince(self.last_accessed)
242
+
243
+ @property
244
+ def last_modified_str(self) -> str:
245
+ """
246
+ (property) Last time a blob file of the repo has been modified, returned as a
247
+ human-readable string.
248
+
249
+ Example: "2 weeks ago".
250
+ """
251
+ return _format_timesince(self.last_modified)
252
+
253
+ @property
254
+ def size_on_disk_str(self) -> str:
255
+ """
256
+ (property) Sum of the blob file sizes as a human-readable string.
257
+
258
+ Example: "42.2K".
259
+ """
260
+ return _format_size(self.size_on_disk)
261
+
262
+ @property
263
+ def refs(self) -> Dict[str, CachedRevisionInfo]:
264
+ """
265
+ (property) Mapping between `refs` and revision data structures.
266
+ """
267
+ return {ref: revision for revision in self.revisions for ref in revision.refs}
268
+
269
+
270
+ @dataclass(frozen=True)
271
+ class DeleteCacheStrategy:
272
+ """Frozen data structure holding the strategy to delete cached revisions.
273
+
274
+ This object is not meant to be instantiated programmatically but to be returned by
275
+ [`~utils.HFCacheInfo.delete_revisions`]. See documentation for usage example.
276
+
277
+ Args:
278
+ expected_freed_size (`float`):
279
+ Expected freed size once strategy is executed.
280
+ blobs (`FrozenSet[Path]`):
281
+ Set of blob file paths to be deleted.
282
+ refs (`FrozenSet[Path]`):
283
+ Set of reference file paths to be deleted.
284
+ repos (`FrozenSet[Path]`):
285
+ Set of entire repo paths to be deleted.
286
+ snapshots (`FrozenSet[Path]`):
287
+ Set of snapshots to be deleted (directory of symlinks).
288
+ """
289
+
290
+ expected_freed_size: int
291
+ blobs: FrozenSet[Path]
292
+ refs: FrozenSet[Path]
293
+ repos: FrozenSet[Path]
294
+ snapshots: FrozenSet[Path]
295
+
296
+ @property
297
+ def expected_freed_size_str(self) -> str:
298
+ """
299
+ (property) Expected size that will be freed as a human-readable string.
300
+
301
+ Example: "42.2K".
302
+ """
303
+ return _format_size(self.expected_freed_size)
304
+
305
+ def execute(self) -> None:
306
+ """Execute the defined strategy.
307
+
308
+ <Tip warning={true}>
309
+
310
+ If this method is interrupted, the cache might get corrupted. Deletion order is
311
+ implemented so that references and symlinks are deleted before the actual blob
312
+ files.
313
+
314
+ </Tip>
315
+
316
+ <Tip warning={true}>
317
+
318
+ This method is irreversible. If executed, cached files are erased and must be
319
+ downloaded again.
320
+
321
+ </Tip>
322
+ """
323
+ # Deletion order matters. Blobs are deleted in last so that the user can't end
324
+ # up in a state where a `ref`` refers to a missing snapshot or a snapshot
325
+ # symlink refers to a deleted blob.
326
+
327
+ # Delete entire repos
328
+ for path in self.repos:
329
+ _try_delete_path(path, path_type="repo")
330
+
331
+ # Delete snapshot directories
332
+ for path in self.snapshots:
333
+ _try_delete_path(path, path_type="snapshot")
334
+
335
+ # Delete refs files
336
+ for path in self.refs:
337
+ _try_delete_path(path, path_type="ref")
338
+
339
+ # Delete blob files
340
+ for path in self.blobs:
341
+ _try_delete_path(path, path_type="blob")
342
+
343
+ logger.info(f"Cache deletion done. Saved {self.expected_freed_size_str}.")
344
+
345
+
346
+ @dataclass(frozen=True)
347
+ class HFCacheInfo:
348
+ """Frozen data structure holding information about the entire cache-system.
349
+
350
+ This data structure is returned by [`scan_cache_dir`] and is immutable.
351
+
352
+ Args:
353
+ size_on_disk (`int`):
354
+ Sum of all valid repo sizes in the cache-system.
355
+ repos (`FrozenSet[CachedRepoInfo]`):
356
+ Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
357
+ cache-system while scanning.
358
+ warnings (`List[CorruptedCacheException]`):
359
+ List of [`~CorruptedCacheException`] that occurred while scanning the cache.
360
+ Those exceptions are captured so that the scan can continue. Corrupted repos
361
+ are skipped from the scan.
362
+
363
+ <Tip warning={true}>
364
+
365
+ Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
366
+ some cached repos are corrupted, their sizes are not taken into account.
367
+
368
+ </Tip>
369
+ """
370
+
371
+ size_on_disk: int
372
+ repos: FrozenSet[CachedRepoInfo]
373
+ warnings: List[CorruptedCacheException]
374
+
375
+ @property
376
+ def size_on_disk_str(self) -> str:
377
+ """
378
+ (property) Sum of all valid repo sizes in the cache-system as a human-readable
379
+ string.
380
+
381
+ Example: "42.2K".
382
+ """
383
+ return _format_size(self.size_on_disk)
384
+
385
+ def delete_revisions(self, *revisions: str) -> DeleteCacheStrategy:
386
+ """Prepare the strategy to delete one or more revisions cached locally.
387
+
388
+ Input revisions can be any revision hash. If a revision hash is not found in the
389
+ local cache, a warning is thrown but no error is raised. Revisions can be from
390
+ different cached repos since hashes are unique across repos,
391
+
392
+ Examples:
393
+ ```py
394
+ >>> from huggingface_hub import scan_cache_dir
395
+ >>> cache_info = scan_cache_dir()
396
+ >>> delete_strategy = cache_info.delete_revisions(
397
+ ... "81fd1d6e7847c99f5862c9fb81387956d99ec7aa"
398
+ ... )
399
+ >>> print(f"Will free {delete_strategy.expected_freed_size_str}.")
400
+ Will free 7.9K.
401
+ >>> delete_strategy.execute()
402
+ Cache deletion done. Saved 7.9K.
403
+ ```
404
+
405
+ ```py
406
+ >>> from huggingface_hub import scan_cache_dir
407
+ >>> scan_cache_dir().delete_revisions(
408
+ ... "81fd1d6e7847c99f5862c9fb81387956d99ec7aa",
409
+ ... "e2983b237dccf3ab4937c97fa717319a9ca1a96d",
410
+ ... "6c0e6080953db56375760c0471a8c5f2929baf11",
411
+ ... ).execute()
412
+ Cache deletion done. Saved 8.6G.
413
+ ```
414
+
415
+ <Tip warning={true}>
416
+
417
+ `delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
418
+ be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
419
+ allows having a dry run before actually executing the deletion.
420
+
421
+ </Tip>
422
+ """
423
+ hashes_to_delete: Set[str] = set(revisions)
424
+
425
+ repos_with_revisions: Dict[CachedRepoInfo, Set[CachedRevisionInfo]] = defaultdict(set)
426
+
427
+ for repo in self.repos:
428
+ for revision in repo.revisions:
429
+ if revision.commit_hash in hashes_to_delete:
430
+ repos_with_revisions[repo].add(revision)
431
+ hashes_to_delete.remove(revision.commit_hash)
432
+
433
+ if len(hashes_to_delete) > 0:
434
+ logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
435
+
436
+ delete_strategy_blobs: Set[Path] = set()
437
+ delete_strategy_refs: Set[Path] = set()
438
+ delete_strategy_repos: Set[Path] = set()
439
+ delete_strategy_snapshots: Set[Path] = set()
440
+ delete_strategy_expected_freed_size = 0
441
+
442
+ for affected_repo, revisions_to_delete in repos_with_revisions.items():
443
+ other_revisions = affected_repo.revisions - revisions_to_delete
444
+
445
+ # If no other revisions, it means all revisions are deleted
446
+ # -> delete the entire cached repo
447
+ if len(other_revisions) == 0:
448
+ delete_strategy_repos.add(affected_repo.repo_path)
449
+ delete_strategy_expected_freed_size += affected_repo.size_on_disk
450
+ continue
451
+
452
+ # Some revisions of the repo will be deleted but not all. We need to filter
453
+ # which blob files will not be linked anymore.
454
+ for revision_to_delete in revisions_to_delete:
455
+ # Snapshot dir
456
+ delete_strategy_snapshots.add(revision_to_delete.snapshot_path)
457
+
458
+ # Refs dir
459
+ for ref in revision_to_delete.refs:
460
+ delete_strategy_refs.add(affected_repo.repo_path / "refs" / ref)
461
+
462
+ # Blobs dir
463
+ for file in revision_to_delete.files:
464
+ if file.blob_path not in delete_strategy_blobs:
465
+ is_file_alone = True
466
+ for revision in other_revisions:
467
+ for rev_file in revision.files:
468
+ if file.blob_path == rev_file.blob_path:
469
+ is_file_alone = False
470
+ break
471
+ if not is_file_alone:
472
+ break
473
+
474
+ # Blob file not referenced by remaining revisions -> delete
475
+ if is_file_alone:
476
+ delete_strategy_blobs.add(file.blob_path)
477
+ delete_strategy_expected_freed_size += file.size_on_disk
478
+
479
+ # Return the strategy instead of executing it.
480
+ return DeleteCacheStrategy(
481
+ blobs=frozenset(delete_strategy_blobs),
482
+ refs=frozenset(delete_strategy_refs),
483
+ repos=frozenset(delete_strategy_repos),
484
+ snapshots=frozenset(delete_strategy_snapshots),
485
+ expected_freed_size=delete_strategy_expected_freed_size,
486
+ )
487
+
488
+ def export_as_table(self, *, verbosity: int = 0) -> str:
489
+ """Generate a table from the [`HFCacheInfo`] object.
490
+
491
+ Pass `verbosity=0` to get a table with a single row per repo, with columns
492
+ "repo_id", "repo_type", "size_on_disk", "nb_files", "last_accessed", "last_modified", "refs", "local_path".
493
+
494
+ Pass `verbosity=1` to get a table with a row per repo and revision (thus multiple rows can appear for a single repo), with columns
495
+ "repo_id", "repo_type", "revision", "size_on_disk", "nb_files", "last_modified", "refs", "local_path".
496
+
497
+ Example:
498
+ ```py
499
+ >>> from huggingface_hub.utils import scan_cache_dir
500
+
501
+ >>> hf_cache_info = scan_cache_dir()
502
+ HFCacheInfo(...)
503
+
504
+ >>> print(hf_cache_info.export_as_table())
505
+ REPO ID REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
506
+ --------------------------------------------------- --------- ------------ -------- ------------- ------------- ---- --------------------------------------------------------------------------------------------------
507
+ roberta-base model 2.7M 5 1 day ago 1 week ago main ~/.cache/huggingface/hub/models--roberta-base
508
+ suno/bark model 8.8K 1 1 week ago 1 week ago main ~/.cache/huggingface/hub/models--suno--bark
509
+ t5-base model 893.8M 4 4 days ago 7 months ago main ~/.cache/huggingface/hub/models--t5-base
510
+ t5-large model 3.0G 4 5 weeks ago 5 months ago main ~/.cache/huggingface/hub/models--t5-large
511
+
512
+ >>> print(hf_cache_info.export_as_table(verbosity=1))
513
+ REPO ID REPO TYPE REVISION SIZE ON DISK NB FILES LAST_MODIFIED REFS LOCAL PATH
514
+ --------------------------------------------------- --------- ---------------------------------------- ------------ -------- ------------- ---- -----------------------------------------------------------------------------------------------------------------------------------------------------
515
+ roberta-base model e2da8e2f811d1448a5b465c236feacd80ffbac7b 2.7M 5 1 week ago main ~/.cache/huggingface/hub/models--roberta-base/snapshots/e2da8e2f811d1448a5b465c236feacd80ffbac7b
516
+ suno/bark model 70a8a7d34168586dc5d028fa9666aceade177992 8.8K 1 1 week ago main ~/.cache/huggingface/hub/models--suno--bark/snapshots/70a8a7d34168586dc5d028fa9666aceade177992
517
+ t5-base model a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1 893.8M 4 7 months ago main ~/.cache/huggingface/hub/models--t5-base/snapshots/a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1
518
+ t5-large model 150ebc2c4b72291e770f58e6057481c8d2ed331a 3.0G 4 5 months ago main ~/.cache/huggingface/hub/models--t5-large/snapshots/150ebc2c4b72291e770f58e6057481c8d2ed331a
519
+ ```
520
+
521
+ Args:
522
+ verbosity (`int`, *optional*):
523
+ The verbosity level. Defaults to 0.
524
+
525
+ Returns:
526
+ `str`: The table as a string.
527
+ """
528
+ if verbosity == 0:
529
+ return tabulate(
530
+ rows=[
531
+ [
532
+ repo.repo_id,
533
+ repo.repo_type,
534
+ "{:>12}".format(repo.size_on_disk_str),
535
+ repo.nb_files,
536
+ repo.last_accessed_str,
537
+ repo.last_modified_str,
538
+ ", ".join(sorted(repo.refs)),
539
+ str(repo.repo_path),
540
+ ]
541
+ for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
542
+ ],
543
+ headers=[
544
+ "REPO ID",
545
+ "REPO TYPE",
546
+ "SIZE ON DISK",
547
+ "NB FILES",
548
+ "LAST_ACCESSED",
549
+ "LAST_MODIFIED",
550
+ "REFS",
551
+ "LOCAL PATH",
552
+ ],
553
+ )
554
+ else:
555
+ return tabulate(
556
+ rows=[
557
+ [
558
+ repo.repo_id,
559
+ repo.repo_type,
560
+ revision.commit_hash,
561
+ "{:>12}".format(revision.size_on_disk_str),
562
+ revision.nb_files,
563
+ revision.last_modified_str,
564
+ ", ".join(sorted(revision.refs)),
565
+ str(revision.snapshot_path),
566
+ ]
567
+ for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
568
+ for revision in sorted(repo.revisions, key=lambda revision: revision.commit_hash)
569
+ ],
570
+ headers=[
571
+ "REPO ID",
572
+ "REPO TYPE",
573
+ "REVISION",
574
+ "SIZE ON DISK",
575
+ "NB FILES",
576
+ "LAST_MODIFIED",
577
+ "REFS",
578
+ "LOCAL PATH",
579
+ ],
580
+ )
581
+
582
+
583
+ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
584
+ """Scan the entire HF cache-system and return a [`~HFCacheInfo`] structure.
585
+
586
+ Use `scan_cache_dir` in order to programmatically scan your cache-system. The cache
587
+ will be scanned repo by repo. If a repo is corrupted, a [`~CorruptedCacheException`]
588
+ will be thrown internally but captured and returned in the [`~HFCacheInfo`]
589
+ structure. Only valid repos get a proper report.
590
+
591
+ ```py
592
+ >>> from huggingface_hub import scan_cache_dir
593
+
594
+ >>> hf_cache_info = scan_cache_dir()
595
+ HFCacheInfo(
596
+ size_on_disk=3398085269,
597
+ repos=frozenset({
598
+ CachedRepoInfo(
599
+ repo_id='t5-small',
600
+ repo_type='model',
601
+ repo_path=PosixPath(...),
602
+ size_on_disk=970726914,
603
+ nb_files=11,
604
+ revisions=frozenset({
605
+ CachedRevisionInfo(
606
+ commit_hash='d78aea13fa7ecd06c29e3e46195d6341255065d5',
607
+ size_on_disk=970726339,
608
+ snapshot_path=PosixPath(...),
609
+ files=frozenset({
610
+ CachedFileInfo(
611
+ file_name='config.json',
612
+ size_on_disk=1197
613
+ file_path=PosixPath(...),
614
+ blob_path=PosixPath(...),
615
+ ),
616
+ CachedFileInfo(...),
617
+ ...
618
+ }),
619
+ ),
620
+ CachedRevisionInfo(...),
621
+ ...
622
+ }),
623
+ ),
624
+ CachedRepoInfo(...),
625
+ ...
626
+ }),
627
+ warnings=[
628
+ CorruptedCacheException("Snapshots dir doesn't exist in cached repo: ..."),
629
+ CorruptedCacheException(...),
630
+ ...
631
+ ],
632
+ )
633
+ ```
634
+
635
+ You can also print a detailed report directly from the `huggingface-cli` using:
636
+ ```text
637
+ > huggingface-cli scan-cache
638
+ REPO ID REPO TYPE SIZE ON DISK NB FILES REFS LOCAL PATH
639
+ --------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
640
+ glue dataset 116.3K 15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
641
+ google/fleurs dataset 64.9M 6 main, refs/pr/1 /Users/lucain/.cache/huggingface/hub/datasets--google--fleurs
642
+ Jean-Baptiste/camembert-ner model 441.0M 7 main /Users/lucain/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner
643
+ bert-base-cased model 1.9G 13 main /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
644
+ t5-base model 10.1K 3 main /Users/lucain/.cache/huggingface/hub/models--t5-base
645
+ t5-small model 970.7M 11 refs/pr/1, main /Users/lucain/.cache/huggingface/hub/models--t5-small
646
+
647
+ Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
648
+ Got 1 warning(s) while scanning. Use -vvv to print details.
649
+ ```
650
+
651
+ Args:
652
+ cache_dir (`str` or `Path`, `optional`):
653
+ Cache directory to cache. Defaults to the default HF cache directory.
654
+
655
+ <Tip warning={true}>
656
+
657
+ Raises:
658
+
659
+ `CacheNotFound`
660
+ If the cache directory does not exist.
661
+
662
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
663
+ If the cache directory is a file, instead of a directory.
664
+
665
+ </Tip>
666
+
667
+ Returns: a [`~HFCacheInfo`] object.
668
+ """
669
+ if cache_dir is None:
670
+ cache_dir = HF_HUB_CACHE
671
+
672
+ cache_dir = Path(cache_dir).expanduser().resolve()
673
+ if not cache_dir.exists():
674
+ raise CacheNotFound(
675
+ f"Cache directory not found: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable.",
676
+ cache_dir=cache_dir,
677
+ )
678
+
679
+ if cache_dir.is_file():
680
+ raise ValueError(
681
+ f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
682
+ )
683
+
684
+ repos: Set[CachedRepoInfo] = set()
685
+ warnings: List[CorruptedCacheException] = []
686
+ for repo_path in cache_dir.iterdir():
687
+ if repo_path.name == ".locks": # skip './.locks/' folder
688
+ continue
689
+ try:
690
+ repos.add(_scan_cached_repo(repo_path))
691
+ except CorruptedCacheException as e:
692
+ warnings.append(e)
693
+
694
+ return HFCacheInfo(
695
+ repos=frozenset(repos),
696
+ size_on_disk=sum(repo.size_on_disk for repo in repos),
697
+ warnings=warnings,
698
+ )
699
+
700
+
701
+ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
702
+ """Scan a single cache repo and return information about it.
703
+
704
+ Any unexpected behavior will raise a [`~CorruptedCacheException`].
705
+ """
706
+ if not repo_path.is_dir():
707
+ raise CorruptedCacheException(f"Repo path is not a directory: {repo_path}")
708
+
709
+ if "--" not in repo_path.name:
710
+ raise CorruptedCacheException(f"Repo path is not a valid HuggingFace cache directory: {repo_path}")
711
+
712
+ repo_type, repo_id = repo_path.name.split("--", maxsplit=1)
713
+ repo_type = repo_type[:-1] # "models" -> "model"
714
+ repo_id = repo_id.replace("--", "/") # google/fleurs -> "google/fleurs"
715
+
716
+ if repo_type not in {"dataset", "model", "space"}:
717
+ raise CorruptedCacheException(
718
+ f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
719
+ )
720
+
721
+ blob_stats: Dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
722
+
723
+ snapshots_path = repo_path / "snapshots"
724
+ refs_path = repo_path / "refs"
725
+
726
+ if not snapshots_path.exists() or not snapshots_path.is_dir():
727
+ raise CorruptedCacheException(f"Snapshots dir doesn't exist in cached repo: {snapshots_path}")
728
+
729
+ # Scan over `refs` directory
730
+
731
+ # key is revision hash, value is set of refs
732
+ refs_by_hash: Dict[str, Set[str]] = defaultdict(set)
733
+ if refs_path.exists():
734
+ # Example of `refs` directory
735
+ # ── refs
736
+ # ├── main
737
+ # └── refs
738
+ # └── pr
739
+ # └── 1
740
+ if refs_path.is_file():
741
+ raise CorruptedCacheException(f"Refs directory cannot be a file: {refs_path}")
742
+
743
+ for ref_path in refs_path.glob("**/*"):
744
+ # glob("**/*") iterates over all files and directories -> skip directories
745
+ if ref_path.is_dir() or ref_path.name in FILES_TO_IGNORE:
746
+ continue
747
+
748
+ ref_name = str(ref_path.relative_to(refs_path))
749
+ with ref_path.open() as f:
750
+ commit_hash = f.read()
751
+
752
+ refs_by_hash[commit_hash].add(ref_name)
753
+
754
+ # Scan snapshots directory
755
+ cached_revisions: Set[CachedRevisionInfo] = set()
756
+ for revision_path in snapshots_path.iterdir():
757
+ # Ignore OS-created helper files
758
+ if revision_path.name in FILES_TO_IGNORE:
759
+ continue
760
+ if revision_path.is_file():
761
+ raise CorruptedCacheException(f"Snapshots folder corrupted. Found a file: {revision_path}")
762
+
763
+ cached_files = set()
764
+ for file_path in revision_path.glob("**/*"):
765
+ # glob("**/*") iterates over all files and directories -> skip directories
766
+ if file_path.is_dir():
767
+ continue
768
+
769
+ blob_path = Path(file_path).resolve()
770
+ if not blob_path.exists():
771
+ raise CorruptedCacheException(f"Blob missing (broken symlink): {blob_path}")
772
+
773
+ if blob_path not in blob_stats:
774
+ blob_stats[blob_path] = blob_path.stat()
775
+
776
+ cached_files.add(
777
+ CachedFileInfo(
778
+ file_name=file_path.name,
779
+ file_path=file_path,
780
+ size_on_disk=blob_stats[blob_path].st_size,
781
+ blob_path=blob_path,
782
+ blob_last_accessed=blob_stats[blob_path].st_atime,
783
+ blob_last_modified=blob_stats[blob_path].st_mtime,
784
+ )
785
+ )
786
+
787
+ # Last modified is either the last modified blob file or the revision folder
788
+ # itself if it is empty
789
+ if len(cached_files) > 0:
790
+ revision_last_modified = max(blob_stats[file.blob_path].st_mtime for file in cached_files)
791
+ else:
792
+ revision_last_modified = revision_path.stat().st_mtime
793
+
794
+ cached_revisions.add(
795
+ CachedRevisionInfo(
796
+ commit_hash=revision_path.name,
797
+ files=frozenset(cached_files),
798
+ refs=frozenset(refs_by_hash.pop(revision_path.name, set())),
799
+ size_on_disk=sum(
800
+ blob_stats[blob_path].st_size for blob_path in set(file.blob_path for file in cached_files)
801
+ ),
802
+ snapshot_path=revision_path,
803
+ last_modified=revision_last_modified,
804
+ )
805
+ )
806
+
807
+ # Check that all refs referred to an existing revision
808
+ if len(refs_by_hash) > 0:
809
+ raise CorruptedCacheException(
810
+ f"Reference(s) refer to missing commit hashes: {dict(refs_by_hash)} ({repo_path})."
811
+ )
812
+
813
+ # Last modified is either the last modified blob file or the repo folder itself if
814
+ # no blob files has been found. Same for last accessed.
815
+ if len(blob_stats) > 0:
816
+ repo_last_accessed = max(stat.st_atime for stat in blob_stats.values())
817
+ repo_last_modified = max(stat.st_mtime for stat in blob_stats.values())
818
+ else:
819
+ repo_stats = repo_path.stat()
820
+ repo_last_accessed = repo_stats.st_atime
821
+ repo_last_modified = repo_stats.st_mtime
822
+
823
+ # Build and return frozen structure
824
+ return CachedRepoInfo(
825
+ nb_files=len(blob_stats),
826
+ repo_id=repo_id,
827
+ repo_path=repo_path,
828
+ repo_type=repo_type, # type: ignore
829
+ revisions=frozenset(cached_revisions),
830
+ size_on_disk=sum(stat.st_size for stat in blob_stats.values()),
831
+ last_accessed=repo_last_accessed,
832
+ last_modified=repo_last_modified,
833
+ )
834
+
835
+
836
+ def _format_size(num: int) -> str:
837
+ """Format size in bytes into a human-readable string.
838
+
839
+ Taken from https://stackoverflow.com/a/1094933
840
+ """
841
+ num_f = float(num)
842
+ for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
843
+ if abs(num_f) < 1000.0:
844
+ return f"{num_f:3.1f}{unit}"
845
+ num_f /= 1000.0
846
+ return f"{num_f:.1f}Y"
847
+
848
+
849
+ _TIMESINCE_CHUNKS = (
850
+ # Label, divider, max value
851
+ ("second", 1, 60),
852
+ ("minute", 60, 60),
853
+ ("hour", 60 * 60, 24),
854
+ ("day", 60 * 60 * 24, 6),
855
+ ("week", 60 * 60 * 24 * 7, 6),
856
+ ("month", 60 * 60 * 24 * 30, 11),
857
+ ("year", 60 * 60 * 24 * 365, None),
858
+ )
859
+
860
+
861
+ def _format_timesince(ts: float) -> str:
862
+ """Format timestamp in seconds into a human-readable string, relative to now.
863
+
864
+ Vaguely inspired by Django's `timesince` formatter.
865
+ """
866
+ delta = time.time() - ts
867
+ if delta < 20:
868
+ return "a few seconds ago"
869
+ for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
870
+ value = round(delta / divider)
871
+ if max_value is not None and value <= max_value:
872
+ break
873
+ return f"{value} {label}{'s' if value > 1 else ''} ago"
874
+
875
+
876
+ def _try_delete_path(path: Path, path_type: str) -> None:
877
+ """Try to delete a local file or folder.
878
+
879
+ If the path does not exists, error is logged as a warning and then ignored.
880
+
881
+ Args:
882
+ path (`Path`)
883
+ Path to delete. Can be a file or a folder.
884
+ path_type (`str`)
885
+ What path are we deleting ? Only for logging purposes. Example: "snapshot".
886
+ """
887
+ logger.info(f"Delete {path_type}: {path}")
888
+ try:
889
+ if path.is_file():
890
+ os.remove(path)
891
+ else:
892
+ shutil.rmtree(path)
893
+ except FileNotFoundError:
894
+ logger.warning(f"Couldn't delete {path_type}: file not found ({path})", exc_info=True)
895
+ except PermissionError:
896
+ logger.warning(f"Couldn't delete {path_type}: permission denied ({path})", exc_info=True)
venv/lib/python3.12/site-packages/huggingface_hub/utils/_datetime.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle datetimes in Huggingface Hub."""
16
+
17
+ from datetime import datetime, timezone
18
+
19
+
20
+ def parse_datetime(date_string: str) -> datetime:
21
+ """
22
+ Parses a date_string returned from the server to a datetime object.
23
+
24
+ This parser is a weak-parser is the sense that it handles only a single format of
25
+ date_string. It is expected that the server format will never change. The
26
+ implementation depends only on the standard lib to avoid an external dependency
27
+ (python-dateutil). See full discussion about this decision on PR:
28
+ https://github.com/huggingface/huggingface_hub/pull/999.
29
+
30
+ Example:
31
+ ```py
32
+ > parse_datetime('2022-08-19T07:19:38.123Z')
33
+ datetime.datetime(2022, 8, 19, 7, 19, 38, 123000, tzinfo=timezone.utc)
34
+ ```
35
+
36
+ Args:
37
+ date_string (`str`):
38
+ A string representing a datetime returned by the Hub server.
39
+ String is expected to follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern.
40
+
41
+ Returns:
42
+ A python datetime object.
43
+
44
+ Raises:
45
+ :class:`ValueError`:
46
+ If `date_string` cannot be parsed.
47
+ """
48
+ try:
49
+ # Normalize the string to always have 6 digits of fractional seconds
50
+ if date_string.endswith("Z"):
51
+ # Case 1: No decimal point (e.g., "2024-11-16T00:27:02Z")
52
+ if "." not in date_string:
53
+ # No fractional seconds - insert .000000
54
+ date_string = date_string[:-1] + ".000000Z"
55
+ # Case 2: Has decimal point (e.g., "2022-08-19T07:19:38.123456789Z")
56
+ else:
57
+ # Get the fractional and base parts
58
+ base, fraction = date_string[:-1].split(".")
59
+ # fraction[:6] takes first 6 digits and :0<6 pads with zeros if less than 6 digits
60
+ date_string = f"{base}.{fraction[:6]:0<6}Z"
61
+
62
+ return datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
63
+ except ValueError as e:
64
+ raise ValueError(
65
+ f"Cannot parse '{date_string}' as a datetime. Date string is expected to"
66
+ " follow '%Y-%m-%dT%H:%M:%S.%fZ' pattern."
67
+ ) from e
venv/lib/python3.12/site-packages/huggingface_hub/utils/_deprecation.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from functools import wraps
3
+ from inspect import Parameter, signature
4
+ from typing import Iterable, Optional
5
+
6
+
7
+ def _deprecate_positional_args(*, version: str):
8
+ """Decorator for methods that issues warnings for positional arguments.
9
+ Using the keyword-only argument syntax in pep 3102, arguments after the
10
+ * will issue a warning when passed as a positional argument.
11
+
12
+ Args:
13
+ version (`str`):
14
+ The version when positional arguments will result in error.
15
+ """
16
+
17
+ def _inner_deprecate_positional_args(f):
18
+ sig = signature(f)
19
+ kwonly_args = []
20
+ all_args = []
21
+ for name, param in sig.parameters.items():
22
+ if param.kind == Parameter.POSITIONAL_OR_KEYWORD:
23
+ all_args.append(name)
24
+ elif param.kind == Parameter.KEYWORD_ONLY:
25
+ kwonly_args.append(name)
26
+
27
+ @wraps(f)
28
+ def inner_f(*args, **kwargs):
29
+ extra_args = len(args) - len(all_args)
30
+ if extra_args <= 0:
31
+ return f(*args, **kwargs)
32
+ # extra_args > 0
33
+ args_msg = [
34
+ f"{name}='{arg}'" if isinstance(arg, str) else f"{name}={arg}"
35
+ for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:])
36
+ ]
37
+ args_msg = ", ".join(args_msg)
38
+ warnings.warn(
39
+ f"Deprecated positional argument(s) used in '{f.__name__}': pass"
40
+ f" {args_msg} as keyword args. From version {version} passing these"
41
+ " as positional arguments will result in an error,",
42
+ FutureWarning,
43
+ )
44
+ kwargs.update(zip(sig.parameters, args))
45
+ return f(**kwargs)
46
+
47
+ return inner_f
48
+
49
+ return _inner_deprecate_positional_args
50
+
51
+
52
+ def _deprecate_arguments(
53
+ *,
54
+ version: str,
55
+ deprecated_args: Iterable[str],
56
+ custom_message: Optional[str] = None,
57
+ ):
58
+ """Decorator to issue warnings when using deprecated arguments.
59
+
60
+ TODO: could be useful to be able to set a custom error message.
61
+
62
+ Args:
63
+ version (`str`):
64
+ The version when deprecated arguments will result in error.
65
+ deprecated_args (`List[str]`):
66
+ List of the arguments to be deprecated.
67
+ custom_message (`str`, *optional*):
68
+ Warning message that is raised. If not passed, a default warning message
69
+ will be created.
70
+ """
71
+
72
+ def _inner_deprecate_positional_args(f):
73
+ sig = signature(f)
74
+
75
+ @wraps(f)
76
+ def inner_f(*args, **kwargs):
77
+ # Check for used deprecated arguments
78
+ used_deprecated_args = []
79
+ for _, parameter in zip(args, sig.parameters.values()):
80
+ if parameter.name in deprecated_args:
81
+ used_deprecated_args.append(parameter.name)
82
+ for kwarg_name, kwarg_value in kwargs.items():
83
+ if (
84
+ # If argument is deprecated but still used
85
+ kwarg_name in deprecated_args
86
+ # And then the value is not the default value
87
+ and kwarg_value != sig.parameters[kwarg_name].default
88
+ ):
89
+ used_deprecated_args.append(kwarg_name)
90
+
91
+ # Warn and proceed
92
+ if len(used_deprecated_args) > 0:
93
+ message = (
94
+ f"Deprecated argument(s) used in '{f.__name__}':"
95
+ f" {', '.join(used_deprecated_args)}. Will not be supported from"
96
+ f" version '{version}'."
97
+ )
98
+ if custom_message is not None:
99
+ message += "\n\n" + custom_message
100
+ warnings.warn(message, FutureWarning)
101
+ return f(*args, **kwargs)
102
+
103
+ return inner_f
104
+
105
+ return _inner_deprecate_positional_args
106
+
107
+
108
+ def _deprecate_method(*, version: str, message: Optional[str] = None):
109
+ """Decorator to issue warnings when using a deprecated method.
110
+
111
+ Args:
112
+ version (`str`):
113
+ The version when deprecated arguments will result in error.
114
+ message (`str`, *optional*):
115
+ Warning message that is raised. If not passed, a default warning message
116
+ will be created.
117
+ """
118
+
119
+ def _inner_deprecate_method(f):
120
+ name = f.__name__
121
+ if name == "__init__":
122
+ name = f.__qualname__.split(".")[0] # class name instead of method name
123
+
124
+ @wraps(f)
125
+ def inner_f(*args, **kwargs):
126
+ warning_message = (
127
+ f"'{name}' (from '{f.__module__}') is deprecated and will be removed from version '{version}'."
128
+ )
129
+ if message is not None:
130
+ warning_message += " " + message
131
+ warnings.warn(warning_message, FutureWarning)
132
+ return f(*args, **kwargs)
133
+
134
+ return inner_f
135
+
136
+ return _inner_deprecate_method
venv/lib/python3.12/site-packages/huggingface_hub/utils/_experimental.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to flag a feature as "experimental" in Huggingface Hub."""
16
+
17
+ import warnings
18
+ from functools import wraps
19
+ from typing import Callable
20
+
21
+ from .. import constants
22
+
23
+
24
+ def experimental(fn: Callable) -> Callable:
25
+ """Decorator to flag a feature as experimental.
26
+
27
+ An experimental feature triggers a warning when used as it might be subject to breaking changes without prior notice
28
+ in the future.
29
+
30
+ Warnings can be disabled by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
31
+
32
+ Args:
33
+ fn (`Callable`):
34
+ The function to flag as experimental.
35
+
36
+ Returns:
37
+ `Callable`: The decorated function.
38
+
39
+ Example:
40
+
41
+ ```python
42
+ >>> from huggingface_hub.utils import experimental
43
+
44
+ >>> @experimental
45
+ ... def my_function():
46
+ ... print("Hello world!")
47
+
48
+ >>> my_function()
49
+ UserWarning: 'my_function' is experimental and might be subject to breaking changes in the future without prior
50
+ notice. You can disable this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
51
+ Hello world!
52
+ ```
53
+ """
54
+ # For classes, put the "experimental" around the "__new__" method => __new__ will be removed in warning message
55
+ name = fn.__qualname__[: -len(".__new__")] if fn.__qualname__.endswith(".__new__") else fn.__qualname__
56
+
57
+ @wraps(fn)
58
+ def _inner_fn(*args, **kwargs):
59
+ if not constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING:
60
+ warnings.warn(
61
+ f"'{name}' is experimental and might be subject to breaking changes in the future without prior notice."
62
+ " You can disable this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment"
63
+ " variable.",
64
+ UserWarning,
65
+ )
66
+ return fn(*args, **kwargs)
67
+
68
+ return _inner_fn
venv/lib/python3.12/site-packages/huggingface_hub/utils/_fixes.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # JSONDecodeError was introduced in requests=2.27 released in 2022.
2
+ # This allows us to support older requests for users
3
+ # More information: https://github.com/psf/requests/pull/5856
4
+ try:
5
+ from requests import JSONDecodeError # type: ignore # noqa: F401
6
+ except ImportError:
7
+ try:
8
+ from simplejson import JSONDecodeError # type: ignore # noqa: F401
9
+ except ImportError:
10
+ from json import JSONDecodeError # type: ignore # noqa: F401
11
+ import contextlib
12
+ import os
13
+ import shutil
14
+ import stat
15
+ import tempfile
16
+ import time
17
+ from functools import partial
18
+ from pathlib import Path
19
+ from typing import Callable, Generator, Optional, Union
20
+
21
+ import yaml
22
+ from filelock import BaseFileLock, FileLock, SoftFileLock, Timeout
23
+
24
+ from .. import constants
25
+ from . import logging
26
+
27
+
28
+ logger = logging.get_logger(__name__)
29
+
30
+ # Wrap `yaml.dump` to set `allow_unicode=True` by default.
31
+ #
32
+ # Example:
33
+ # ```py
34
+ # >>> yaml.dump({"emoji": "👀", "some unicode": "日本か"})
35
+ # 'emoji: "\\U0001F440"\nsome unicode: "\\u65E5\\u672C\\u304B"\n'
36
+ #
37
+ # >>> yaml_dump({"emoji": "👀", "some unicode": "日本か"})
38
+ # 'emoji: "👀"\nsome unicode: "日本か"\n'
39
+ # ```
40
+ yaml_dump: Callable[..., str] = partial(yaml.dump, stream=None, allow_unicode=True) # type: ignore
41
+
42
+
43
+ @contextlib.contextmanager
44
+ def SoftTemporaryDirectory(
45
+ suffix: Optional[str] = None,
46
+ prefix: Optional[str] = None,
47
+ dir: Optional[Union[Path, str]] = None,
48
+ **kwargs,
49
+ ) -> Generator[Path, None, None]:
50
+ """
51
+ Context manager to create a temporary directory and safely delete it.
52
+
53
+ If tmp directory cannot be deleted normally, we set the WRITE permission and retry.
54
+ If cleanup still fails, we give up but don't raise an exception. This is equivalent
55
+ to `tempfile.TemporaryDirectory(..., ignore_cleanup_errors=True)` introduced in
56
+ Python 3.10.
57
+
58
+ See https://www.scivision.dev/python-tempfile-permission-error-windows/.
59
+ """
60
+ tmpdir = tempfile.TemporaryDirectory(prefix=prefix, suffix=suffix, dir=dir, **kwargs)
61
+ yield Path(tmpdir.name).resolve()
62
+
63
+ try:
64
+ # First once with normal cleanup
65
+ shutil.rmtree(tmpdir.name)
66
+ except Exception:
67
+ # If failed, try to set write permission and retry
68
+ try:
69
+ shutil.rmtree(tmpdir.name, onerror=_set_write_permission_and_retry)
70
+ except Exception:
71
+ pass
72
+
73
+ # And finally, cleanup the tmpdir.
74
+ # If it fails again, give up but do not throw error
75
+ try:
76
+ tmpdir.cleanup()
77
+ except Exception:
78
+ pass
79
+
80
+
81
+ def _set_write_permission_and_retry(func, path, excinfo):
82
+ os.chmod(path, stat.S_IWRITE)
83
+ func(path)
84
+
85
+
86
+ @contextlib.contextmanager
87
+ def WeakFileLock(
88
+ lock_file: Union[str, Path], *, timeout: Optional[float] = None
89
+ ) -> Generator[BaseFileLock, None, None]:
90
+ """A filelock with some custom logic.
91
+
92
+ This filelock is weaker than the default filelock in that:
93
+ 1. It won't raise an exception if release fails.
94
+ 2. It will default to a SoftFileLock if the filesystem does not support flock.
95
+
96
+ An INFO log message is emitted every 10 seconds if the lock is not acquired immediately.
97
+ If a timeout is provided, a `filelock.Timeout` exception is raised if the lock is not acquired within the timeout.
98
+ """
99
+ log_interval = constants.FILELOCK_LOG_EVERY_SECONDS
100
+ lock = FileLock(lock_file, timeout=log_interval)
101
+ start_time = time.time()
102
+
103
+ while True:
104
+ elapsed_time = time.time() - start_time
105
+ if timeout is not None and elapsed_time >= timeout:
106
+ raise Timeout(str(lock_file))
107
+
108
+ try:
109
+ lock.acquire(timeout=min(log_interval, timeout - elapsed_time) if timeout else log_interval)
110
+ except Timeout:
111
+ logger.info(
112
+ f"Still waiting to acquire lock on {lock_file} (elapsed: {time.time() - start_time:.1f} seconds)"
113
+ )
114
+ except NotImplementedError as e:
115
+ if "use SoftFileLock instead" in str(e):
116
+ logger.warning(
117
+ "FileSystem does not appear to support flock. Falling back to SoftFileLock for %s", lock_file
118
+ )
119
+ lock = SoftFileLock(lock_file, timeout=log_interval)
120
+ continue
121
+ else:
122
+ break
123
+
124
+ try:
125
+ yield lock
126
+ finally:
127
+ try:
128
+ lock.release()
129
+ except OSError:
130
+ try:
131
+ Path(lock_file).unlink()
132
+ except OSError:
133
+ pass
venv/lib/python3.12/site-packages/huggingface_hub/utils/_http.py ADDED
@@ -0,0 +1,637 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle HTTP requests in Huggingface Hub."""
16
+
17
+ import io
18
+ import os
19
+ import re
20
+ import threading
21
+ import time
22
+ import uuid
23
+ from functools import lru_cache
24
+ from http import HTTPStatus
25
+ from shlex import quote
26
+ from typing import Any, Callable, List, Optional, Tuple, Type, Union
27
+
28
+ import requests
29
+ from requests import HTTPError, Response
30
+ from requests.adapters import HTTPAdapter
31
+ from requests.models import PreparedRequest
32
+
33
+ from huggingface_hub.errors import OfflineModeIsEnabled
34
+
35
+ from .. import constants
36
+ from ..errors import (
37
+ BadRequestError,
38
+ DisabledRepoError,
39
+ EntryNotFoundError,
40
+ GatedRepoError,
41
+ HfHubHTTPError,
42
+ RepositoryNotFoundError,
43
+ RevisionNotFoundError,
44
+ )
45
+ from . import logging
46
+ from ._fixes import JSONDecodeError
47
+ from ._lfs import SliceFileObj
48
+ from ._typing import HTTP_METHOD_T
49
+
50
+
51
+ logger = logging.get_logger(__name__)
52
+
53
+ # Both headers are used by the Hub to debug failed requests.
54
+ # `X_AMZN_TRACE_ID` is better as it also works to debug on Cloudfront and ALB.
55
+ # If `X_AMZN_TRACE_ID` is set, the Hub will use it as well.
56
+ X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
57
+ X_REQUEST_ID = "x-request-id"
58
+
59
+ REPO_API_REGEX = re.compile(
60
+ r"""
61
+ # staging or production endpoint
62
+ ^https://[^/]+
63
+ (
64
+ # on /api/repo_type/repo_id
65
+ /api/(models|datasets|spaces)/(.+)
66
+ |
67
+ # or /repo_id/resolve/revision/...
68
+ /(.+)/resolve/(.+)
69
+ )
70
+ """,
71
+ flags=re.VERBOSE,
72
+ )
73
+
74
+
75
+ class UniqueRequestIdAdapter(HTTPAdapter):
76
+ X_AMZN_TRACE_ID = "X-Amzn-Trace-Id"
77
+
78
+ def add_headers(self, request, **kwargs):
79
+ super().add_headers(request, **kwargs)
80
+
81
+ # Add random request ID => easier for server-side debug
82
+ if X_AMZN_TRACE_ID not in request.headers:
83
+ request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
84
+
85
+ # Add debug log
86
+ has_token = len(str(request.headers.get("authorization", ""))) > 0
87
+ logger.debug(
88
+ f"Request {request.headers[X_AMZN_TRACE_ID]}: {request.method} {request.url} (authenticated: {has_token})"
89
+ )
90
+
91
+ def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
92
+ """Catch any RequestException to append request id to the error message for debugging."""
93
+ if constants.HF_DEBUG:
94
+ logger.debug(f"Send: {_curlify(request)}")
95
+ try:
96
+ return super().send(request, *args, **kwargs)
97
+ except requests.RequestException as e:
98
+ request_id = request.headers.get(X_AMZN_TRACE_ID)
99
+ if request_id is not None:
100
+ # Taken from https://stackoverflow.com/a/58270258
101
+ e.args = (*e.args, f"(Request ID: {request_id})")
102
+ raise
103
+
104
+
105
+ class OfflineAdapter(HTTPAdapter):
106
+ def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
107
+ raise OfflineModeIsEnabled(
108
+ f"Cannot reach {request.url}: offline mode is enabled. To disable it, please unset the `HF_HUB_OFFLINE` environment variable."
109
+ )
110
+
111
+
112
+ def _default_backend_factory() -> requests.Session:
113
+ session = requests.Session()
114
+ if constants.HF_HUB_OFFLINE:
115
+ session.mount("http://", OfflineAdapter())
116
+ session.mount("https://", OfflineAdapter())
117
+ else:
118
+ session.mount("http://", UniqueRequestIdAdapter())
119
+ session.mount("https://", UniqueRequestIdAdapter())
120
+ return session
121
+
122
+
123
+ BACKEND_FACTORY_T = Callable[[], requests.Session]
124
+ _GLOBAL_BACKEND_FACTORY: BACKEND_FACTORY_T = _default_backend_factory
125
+
126
+
127
+ def configure_http_backend(backend_factory: BACKEND_FACTORY_T = _default_backend_factory) -> None:
128
+ """
129
+ Configure the HTTP backend by providing a `backend_factory`. Any HTTP calls made by `huggingface_hub` will use a
130
+ Session object instantiated by this factory. This can be useful if you are running your scripts in a specific
131
+ environment requiring custom configuration (e.g. custom proxy or certifications).
132
+
133
+ Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
134
+ `huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
135
+ set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
136
+ calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
137
+
138
+ See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
139
+
140
+ Example:
141
+ ```py
142
+ import requests
143
+ from huggingface_hub import configure_http_backend, get_session
144
+
145
+ # Create a factory function that returns a Session with configured proxies
146
+ def backend_factory() -> requests.Session:
147
+ session = requests.Session()
148
+ session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
149
+ return session
150
+
151
+ # Set it as the default session factory
152
+ configure_http_backend(backend_factory=backend_factory)
153
+
154
+ # In practice, this is mostly done internally in `huggingface_hub`
155
+ session = get_session()
156
+ ```
157
+ """
158
+ global _GLOBAL_BACKEND_FACTORY
159
+ _GLOBAL_BACKEND_FACTORY = backend_factory
160
+ reset_sessions()
161
+
162
+
163
+ def get_session() -> requests.Session:
164
+ """
165
+ Get a `requests.Session` object, using the session factory from the user.
166
+
167
+ Use [`get_session`] to get a configured Session. Since `requests.Session` is not guaranteed to be thread-safe,
168
+ `huggingface_hub` creates 1 Session instance per thread. They are all instantiated using the same `backend_factory`
169
+ set in [`configure_http_backend`]. A LRU cache is used to cache the created sessions (and connections) between
170
+ calls. Max size is 128 to avoid memory leaks if thousands of threads are spawned.
171
+
172
+ See [this issue](https://github.com/psf/requests/issues/2766) to know more about thread-safety in `requests`.
173
+
174
+ Example:
175
+ ```py
176
+ import requests
177
+ from huggingface_hub import configure_http_backend, get_session
178
+
179
+ # Create a factory function that returns a Session with configured proxies
180
+ def backend_factory() -> requests.Session:
181
+ session = requests.Session()
182
+ session.proxies = {"http": "http://10.10.1.10:3128", "https": "https://10.10.1.11:1080"}
183
+ return session
184
+
185
+ # Set it as the default session factory
186
+ configure_http_backend(backend_factory=backend_factory)
187
+
188
+ # In practice, this is mostly done internally in `huggingface_hub`
189
+ session = get_session()
190
+ ```
191
+ """
192
+ return _get_session_from_cache(process_id=os.getpid(), thread_id=threading.get_ident())
193
+
194
+
195
+ def reset_sessions() -> None:
196
+ """Reset the cache of sessions.
197
+
198
+ Mostly used internally when sessions are reconfigured or an SSLError is raised.
199
+ See [`configure_http_backend`] for more details.
200
+ """
201
+ _get_session_from_cache.cache_clear()
202
+
203
+
204
+ @lru_cache
205
+ def _get_session_from_cache(process_id: int, thread_id: int) -> requests.Session:
206
+ """
207
+ Create a new session per thread using global factory. Using LRU cache (maxsize 128) to avoid memory leaks when
208
+ using thousands of threads. Cache is cleared when `configure_http_backend` is called.
209
+ """
210
+ return _GLOBAL_BACKEND_FACTORY()
211
+
212
+
213
+ def http_backoff(
214
+ method: HTTP_METHOD_T,
215
+ url: str,
216
+ *,
217
+ max_retries: int = 5,
218
+ base_wait_time: float = 1,
219
+ max_wait_time: float = 8,
220
+ retry_on_exceptions: Union[Type[Exception], Tuple[Type[Exception], ...]] = (
221
+ requests.Timeout,
222
+ requests.ConnectionError,
223
+ ),
224
+ retry_on_status_codes: Union[int, Tuple[int, ...]] = HTTPStatus.SERVICE_UNAVAILABLE,
225
+ **kwargs,
226
+ ) -> Response:
227
+ """Wrapper around requests to retry calls on an endpoint, with exponential backoff.
228
+
229
+ Endpoint call is retried on exceptions (ex: connection timeout, proxy error,...)
230
+ and/or on specific status codes (ex: service unavailable). If the call failed more
231
+ than `max_retries`, the exception is thrown or `raise_for_status` is called on the
232
+ response object.
233
+
234
+ Re-implement mechanisms from the `backoff` library to avoid adding an external
235
+ dependencies to `hugging_face_hub`. See https://github.com/litl/backoff.
236
+
237
+ Args:
238
+ method (`Literal["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"]`):
239
+ HTTP method to perform.
240
+ url (`str`):
241
+ The URL of the resource to fetch.
242
+ max_retries (`int`, *optional*, defaults to `5`):
243
+ Maximum number of retries, defaults to 5 (no retries).
244
+ base_wait_time (`float`, *optional*, defaults to `1`):
245
+ Duration (in seconds) to wait before retrying the first time.
246
+ Wait time between retries then grows exponentially, capped by
247
+ `max_wait_time`.
248
+ max_wait_time (`float`, *optional*, defaults to `8`):
249
+ Maximum duration (in seconds) to wait before retrying.
250
+ retry_on_exceptions (`Type[Exception]` or `Tuple[Type[Exception]]`, *optional*):
251
+ Define which exceptions must be caught to retry the request. Can be a single type or a tuple of types.
252
+ By default, retry on `requests.Timeout` and `requests.ConnectionError`.
253
+ retry_on_status_codes (`int` or `Tuple[int]`, *optional*, defaults to `503`):
254
+ Define on which status codes the request must be retried. By default, only
255
+ HTTP 503 Service Unavailable is retried.
256
+ **kwargs (`dict`, *optional*):
257
+ kwargs to pass to `requests.request`.
258
+
259
+ Example:
260
+ ```
261
+ >>> from huggingface_hub.utils import http_backoff
262
+
263
+ # Same usage as "requests.request".
264
+ >>> response = http_backoff("GET", "https://www.google.com")
265
+ >>> response.raise_for_status()
266
+
267
+ # If you expect a Gateway Timeout from time to time
268
+ >>> http_backoff("PUT", upload_url, data=data, retry_on_status_codes=504)
269
+ >>> response.raise_for_status()
270
+ ```
271
+
272
+ <Tip warning={true}>
273
+
274
+ When using `requests` it is possible to stream data by passing an iterator to the
275
+ `data` argument. On http backoff this is a problem as the iterator is not reset
276
+ after a failed call. This issue is mitigated for file objects or any IO streams
277
+ by saving the initial position of the cursor (with `data.tell()`) and resetting the
278
+ cursor between each call (with `data.seek()`). For arbitrary iterators, http backoff
279
+ will fail. If this is a hard constraint for you, please let us know by opening an
280
+ issue on [Github](https://github.com/huggingface/huggingface_hub).
281
+
282
+ </Tip>
283
+ """
284
+ if isinstance(retry_on_exceptions, type): # Tuple from single exception type
285
+ retry_on_exceptions = (retry_on_exceptions,)
286
+
287
+ if isinstance(retry_on_status_codes, int): # Tuple from single status code
288
+ retry_on_status_codes = (retry_on_status_codes,)
289
+
290
+ nb_tries = 0
291
+ sleep_time = base_wait_time
292
+
293
+ # If `data` is used and is a file object (or any IO), it will be consumed on the
294
+ # first HTTP request. We need to save the initial position so that the full content
295
+ # of the file is re-sent on http backoff. See warning tip in docstring.
296
+ io_obj_initial_pos = None
297
+ if "data" in kwargs and isinstance(kwargs["data"], (io.IOBase, SliceFileObj)):
298
+ io_obj_initial_pos = kwargs["data"].tell()
299
+
300
+ session = get_session()
301
+ while True:
302
+ nb_tries += 1
303
+ try:
304
+ # If `data` is used and is a file object (or any IO), set back cursor to
305
+ # initial position.
306
+ if io_obj_initial_pos is not None:
307
+ kwargs["data"].seek(io_obj_initial_pos)
308
+
309
+ # Perform request and return if status_code is not in the retry list.
310
+ response = session.request(method=method, url=url, **kwargs)
311
+ if response.status_code not in retry_on_status_codes:
312
+ return response
313
+
314
+ # Wrong status code returned (HTTP 503 for instance)
315
+ logger.warning(f"HTTP Error {response.status_code} thrown while requesting {method} {url}")
316
+ if nb_tries > max_retries:
317
+ response.raise_for_status() # Will raise uncaught exception
318
+ # We return response to avoid infinite loop in the corner case where the
319
+ # user ask for retry on a status code that doesn't raise_for_status.
320
+ return response
321
+
322
+ except retry_on_exceptions as err:
323
+ logger.warning(f"'{err}' thrown while requesting {method} {url}")
324
+
325
+ if isinstance(err, requests.ConnectionError):
326
+ reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
327
+
328
+ if nb_tries > max_retries:
329
+ raise err
330
+
331
+ # Sleep for X seconds
332
+ logger.warning(f"Retrying in {sleep_time}s [Retry {nb_tries}/{max_retries}].")
333
+ time.sleep(sleep_time)
334
+
335
+ # Update sleep time for next retry
336
+ sleep_time = min(max_wait_time, sleep_time * 2) # Exponential backoff
337
+
338
+
339
+ def fix_hf_endpoint_in_url(url: str, endpoint: Optional[str]) -> str:
340
+ """Replace the default endpoint in a URL by a custom one.
341
+
342
+ This is useful when using a proxy and the Hugging Face Hub returns a URL with the default endpoint.
343
+ """
344
+ endpoint = endpoint.rstrip("/") if endpoint else constants.ENDPOINT
345
+ # check if a proxy has been set => if yes, update the returned URL to use the proxy
346
+ if endpoint not in (constants._HF_DEFAULT_ENDPOINT, constants._HF_DEFAULT_STAGING_ENDPOINT):
347
+ url = url.replace(constants._HF_DEFAULT_ENDPOINT, endpoint)
348
+ url = url.replace(constants._HF_DEFAULT_STAGING_ENDPOINT, endpoint)
349
+ return url
350
+
351
+
352
+ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None) -> None:
353
+ """
354
+ Internal version of `response.raise_for_status()` that will refine a
355
+ potential HTTPError. Raised exception will be an instance of `HfHubHTTPError`.
356
+
357
+ This helper is meant to be the unique method to raise_for_status when making a call
358
+ to the Hugging Face Hub.
359
+
360
+
361
+ Example:
362
+ ```py
363
+ import requests
364
+ from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
365
+
366
+ response = get_session().post(...)
367
+ try:
368
+ hf_raise_for_status(response)
369
+ except HfHubHTTPError as e:
370
+ print(str(e)) # formatted message
371
+ e.request_id, e.server_message # details returned by server
372
+
373
+ # Complete the error message with additional information once it's raised
374
+ e.append_to_message("\n`create_commit` expects the repository to exist.")
375
+ raise
376
+ ```
377
+
378
+ Args:
379
+ response (`Response`):
380
+ Response from the server.
381
+ endpoint_name (`str`, *optional*):
382
+ Name of the endpoint that has been called. If provided, the error message
383
+ will be more complete.
384
+
385
+ <Tip warning={true}>
386
+
387
+ Raises when the request has failed:
388
+
389
+ - [`~utils.RepositoryNotFoundError`]
390
+ If the repository to download from cannot be found. This may be because it
391
+ doesn't exist, because `repo_type` is not set correctly, or because the repo
392
+ is `private` and you do not have access.
393
+ - [`~utils.GatedRepoError`]
394
+ If the repository exists but is gated and the user is not on the authorized
395
+ list.
396
+ - [`~utils.RevisionNotFoundError`]
397
+ If the repository exists but the revision couldn't be find.
398
+ - [`~utils.EntryNotFoundError`]
399
+ If the repository exists but the entry (e.g. the requested file) couldn't be
400
+ find.
401
+ - [`~utils.BadRequestError`]
402
+ If request failed with a HTTP 400 BadRequest error.
403
+ - [`~utils.HfHubHTTPError`]
404
+ If request failed for a reason not listed above.
405
+
406
+ </Tip>
407
+ """
408
+ try:
409
+ response.raise_for_status()
410
+ except HTTPError as e:
411
+ error_code = response.headers.get("X-Error-Code")
412
+ error_message = response.headers.get("X-Error-Message")
413
+
414
+ if error_code == "RevisionNotFound":
415
+ message = f"{response.status_code} Client Error." + "\n\n" + f"Revision Not Found for url: {response.url}."
416
+ raise _format(RevisionNotFoundError, message, response) from e
417
+
418
+ elif error_code == "EntryNotFound":
419
+ message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
420
+ raise _format(EntryNotFoundError, message, response) from e
421
+
422
+ elif error_code == "GatedRepo":
423
+ message = (
424
+ f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated repo for url {response.url}."
425
+ )
426
+ raise _format(GatedRepoError, message, response) from e
427
+
428
+ elif error_message == "Access to this resource is disabled.":
429
+ message = (
430
+ f"{response.status_code} Client Error."
431
+ + "\n\n"
432
+ + f"Cannot access repository for url {response.url}."
433
+ + "\n"
434
+ + "Access to this resource is disabled."
435
+ )
436
+ raise _format(DisabledRepoError, message, response) from e
437
+
438
+ elif error_code == "RepoNotFound" or (
439
+ response.status_code == 401
440
+ and error_message != "Invalid credentials in Authorization header"
441
+ and response.request is not None
442
+ and response.request.url is not None
443
+ and REPO_API_REGEX.search(response.request.url) is not None
444
+ ):
445
+ # 401 is misleading as it is returned for:
446
+ # - private and gated repos if user is not authenticated
447
+ # - missing repos
448
+ # => for now, we process them as `RepoNotFound` anyway.
449
+ # See https://gist.github.com/Wauplin/46c27ad266b15998ce56a6603796f0b9
450
+ message = (
451
+ f"{response.status_code} Client Error."
452
+ + "\n\n"
453
+ + f"Repository Not Found for url: {response.url}."
454
+ + "\nPlease make sure you specified the correct `repo_id` and"
455
+ " `repo_type`.\nIf you are trying to access a private or gated repo,"
456
+ " make sure you are authenticated. For more details, see"
457
+ " https://huggingface.co/docs/huggingface_hub/authentication"
458
+ )
459
+ raise _format(RepositoryNotFoundError, message, response) from e
460
+
461
+ elif response.status_code == 400:
462
+ message = (
463
+ f"\n\nBad request for {endpoint_name} endpoint:" if endpoint_name is not None else "\n\nBad request:"
464
+ )
465
+ raise _format(BadRequestError, message, response) from e
466
+
467
+ elif response.status_code == 403:
468
+ message = (
469
+ f"\n\n{response.status_code} Forbidden: {error_message}."
470
+ + f"\nCannot access content at: {response.url}."
471
+ + "\nMake sure your token has the correct permissions."
472
+ )
473
+ raise _format(HfHubHTTPError, message, response) from e
474
+
475
+ elif response.status_code == 416:
476
+ range_header = response.request.headers.get("Range")
477
+ message = f"{e}. Requested range: {range_header}. Content-Range: {response.headers.get('Content-Range')}."
478
+ raise _format(HfHubHTTPError, message, response) from e
479
+
480
+ # Convert `HTTPError` into a `HfHubHTTPError` to display request information
481
+ # as well (request id and/or server error message)
482
+ raise _format(HfHubHTTPError, str(e), response) from e
483
+
484
+
485
+ def _format(error_type: Type[HfHubHTTPError], custom_message: str, response: Response) -> HfHubHTTPError:
486
+ server_errors = []
487
+
488
+ # Retrieve server error from header
489
+ from_headers = response.headers.get("X-Error-Message")
490
+ if from_headers is not None:
491
+ server_errors.append(from_headers)
492
+
493
+ # Retrieve server error from body
494
+ try:
495
+ # Case errors are returned in a JSON format
496
+ data = response.json()
497
+
498
+ error = data.get("error")
499
+ if error is not None:
500
+ if isinstance(error, list):
501
+ # Case {'error': ['my error 1', 'my error 2']}
502
+ server_errors.extend(error)
503
+ else:
504
+ # Case {'error': 'my error'}
505
+ server_errors.append(error)
506
+
507
+ errors = data.get("errors")
508
+ if errors is not None:
509
+ # Case {'errors': [{'message': 'my error 1'}, {'message': 'my error 2'}]}
510
+ for error in errors:
511
+ if "message" in error:
512
+ server_errors.append(error["message"])
513
+
514
+ except JSONDecodeError:
515
+ # If content is not JSON and not HTML, append the text
516
+ content_type = response.headers.get("Content-Type", "")
517
+ if response.text and "html" not in content_type.lower():
518
+ server_errors.append(response.text)
519
+
520
+ # Strip all server messages
521
+ server_errors = [str(line).strip() for line in server_errors if str(line).strip()]
522
+
523
+ # Deduplicate server messages (keep order)
524
+ # taken from https://stackoverflow.com/a/17016257
525
+ server_errors = list(dict.fromkeys(server_errors))
526
+
527
+ # Format server error
528
+ server_message = "\n".join(server_errors)
529
+
530
+ # Add server error to custom message
531
+ final_error_message = custom_message
532
+ if server_message and server_message.lower() not in custom_message.lower():
533
+ if "\n\n" in custom_message:
534
+ final_error_message += "\n" + server_message
535
+ else:
536
+ final_error_message += "\n\n" + server_message
537
+ # Add Request ID
538
+ request_id = str(response.headers.get(X_REQUEST_ID, ""))
539
+ if request_id:
540
+ request_id_message = f" (Request ID: {request_id})"
541
+ else:
542
+ # Fallback to X-Amzn-Trace-Id
543
+ request_id = str(response.headers.get(X_AMZN_TRACE_ID, ""))
544
+ if request_id:
545
+ request_id_message = f" (Amzn Trace ID: {request_id})"
546
+ if request_id and request_id.lower() not in final_error_message.lower():
547
+ if "\n" in final_error_message:
548
+ newline_index = final_error_message.index("\n")
549
+ final_error_message = (
550
+ final_error_message[:newline_index] + request_id_message + final_error_message[newline_index:]
551
+ )
552
+ else:
553
+ final_error_message += request_id_message
554
+
555
+ # Return
556
+ return error_type(final_error_message.strip(), response=response, server_message=server_message or None)
557
+
558
+
559
+ def _curlify(request: requests.PreparedRequest) -> str:
560
+ """Convert a `requests.PreparedRequest` into a curl command (str).
561
+
562
+ Used for debug purposes only.
563
+
564
+ Implementation vendored from https://github.com/ofw/curlify/blob/master/curlify.py.
565
+ MIT License Copyright (c) 2016 Egor.
566
+ """
567
+ parts: List[Tuple[Any, Any]] = [
568
+ ("curl", None),
569
+ ("-X", request.method),
570
+ ]
571
+
572
+ for k, v in sorted(request.headers.items()):
573
+ if k.lower() == "authorization":
574
+ v = "<TOKEN>" # Hide authorization header, no matter its value (can be Bearer, Key, etc.)
575
+ parts += [("-H", "{0}: {1}".format(k, v))]
576
+
577
+ if request.body:
578
+ body = request.body
579
+ if isinstance(body, bytes):
580
+ body = body.decode("utf-8", errors="ignore")
581
+ elif hasattr(body, "read"):
582
+ body = "<file-like object>" # Don't try to read it to avoid consuming the stream
583
+ if len(body) > 1000:
584
+ body = body[:1000] + " ... [truncated]"
585
+ parts += [("-d", body.replace("\n", ""))]
586
+
587
+ parts += [(None, request.url)]
588
+
589
+ flat_parts = []
590
+ for k, v in parts:
591
+ if k:
592
+ flat_parts.append(quote(k))
593
+ if v:
594
+ flat_parts.append(quote(v))
595
+
596
+ return " ".join(flat_parts)
597
+
598
+
599
+ # Regex to parse HTTP Range header
600
+ RANGE_REGEX = re.compile(r"^\s*bytes\s*=\s*(\d*)\s*-\s*(\d*)\s*$", re.IGNORECASE)
601
+
602
+
603
+ def _adjust_range_header(original_range: Optional[str], resume_size: int) -> Optional[str]:
604
+ """
605
+ Adjust HTTP Range header to account for resume position.
606
+ """
607
+ if not original_range:
608
+ return f"bytes={resume_size}-"
609
+
610
+ if "," in original_range:
611
+ raise ValueError(f"Multiple ranges detected - {original_range!r}, not supported yet.")
612
+
613
+ match = RANGE_REGEX.match(original_range)
614
+ if not match:
615
+ raise RuntimeError(f"Invalid range format - {original_range!r}.")
616
+ start, end = match.groups()
617
+
618
+ if not start:
619
+ if not end:
620
+ raise RuntimeError(f"Invalid range format - {original_range!r}.")
621
+
622
+ new_suffix = int(end) - resume_size
623
+ new_range = f"bytes=-{new_suffix}"
624
+ if new_suffix <= 0:
625
+ raise RuntimeError(f"Empty new range - {new_range!r}.")
626
+ return new_range
627
+
628
+ start = int(start)
629
+ new_start = start + resume_size
630
+ if end:
631
+ end = int(end)
632
+ new_range = f"bytes={new_start}-{end}"
633
+ if new_start > end:
634
+ raise RuntimeError(f"Empty new range - {new_range!r}.")
635
+ return new_range
636
+
637
+ return f"bytes={new_start}-"
venv/lib/python3.12/site-packages/huggingface_hub/utils/_pagination.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle pagination on Huggingface Hub."""
16
+
17
+ from typing import Dict, Iterable, Optional
18
+
19
+ import requests
20
+
21
+ from . import get_session, hf_raise_for_status, http_backoff, logging
22
+
23
+
24
+ logger = logging.get_logger(__name__)
25
+
26
+
27
+ def paginate(path: str, params: Dict, headers: Dict) -> Iterable:
28
+ """Fetch a list of models/datasets/spaces and paginate through results.
29
+
30
+ This is using the same "Link" header format as GitHub.
31
+ See:
32
+ - https://requests.readthedocs.io/en/latest/api/#requests.Response.links
33
+ - https://docs.github.com/en/rest/guides/traversing-with-pagination#link-header
34
+ """
35
+ session = get_session()
36
+ r = session.get(path, params=params, headers=headers)
37
+ hf_raise_for_status(r)
38
+ yield from r.json()
39
+
40
+ # Follow pages
41
+ # Next link already contains query params
42
+ next_page = _get_next_page(r)
43
+ while next_page is not None:
44
+ logger.debug(f"Pagination detected. Requesting next page: {next_page}")
45
+ r = http_backoff("GET", next_page, max_retries=20, retry_on_status_codes=429, headers=headers)
46
+ hf_raise_for_status(r)
47
+ yield from r.json()
48
+ next_page = _get_next_page(r)
49
+
50
+
51
+ def _get_next_page(response: requests.Response) -> Optional[str]:
52
+ return response.links.get("next", {}).get("url")
venv/lib/python3.12/site-packages/huggingface_hub/utils/_paths.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to handle paths in Huggingface Hub."""
16
+
17
+ from fnmatch import fnmatch
18
+ from pathlib import Path
19
+ from typing import Callable, Generator, Iterable, List, Optional, TypeVar, Union
20
+
21
+
22
+ T = TypeVar("T")
23
+
24
+ # Always ignore `.git` and `.cache/huggingface` folders in commits
25
+ DEFAULT_IGNORE_PATTERNS = [
26
+ ".git",
27
+ ".git/*",
28
+ "*/.git",
29
+ "**/.git/**",
30
+ ".cache/huggingface",
31
+ ".cache/huggingface/*",
32
+ "*/.cache/huggingface",
33
+ "**/.cache/huggingface/**",
34
+ ]
35
+ # Forbidden to commit these folders
36
+ FORBIDDEN_FOLDERS = [".git", ".cache"]
37
+
38
+
39
+ def filter_repo_objects(
40
+ items: Iterable[T],
41
+ *,
42
+ allow_patterns: Optional[Union[List[str], str]] = None,
43
+ ignore_patterns: Optional[Union[List[str], str]] = None,
44
+ key: Optional[Callable[[T], str]] = None,
45
+ ) -> Generator[T, None, None]:
46
+ """Filter repo objects based on an allowlist and a denylist.
47
+
48
+ Input must be a list of paths (`str` or `Path`) or a list of arbitrary objects.
49
+ In the later case, `key` must be provided and specifies a function of one argument
50
+ that is used to extract a path from each element in iterable.
51
+
52
+ Patterns are Unix shell-style wildcards which are NOT regular expressions. See
53
+ https://docs.python.org/3/library/fnmatch.html for more details.
54
+
55
+ Args:
56
+ items (`Iterable`):
57
+ List of items to filter.
58
+ allow_patterns (`str` or `List[str]`, *optional*):
59
+ Patterns constituting the allowlist. If provided, item paths must match at
60
+ least one pattern from the allowlist.
61
+ ignore_patterns (`str` or `List[str]`, *optional*):
62
+ Patterns constituting the denylist. If provided, item paths must not match
63
+ any patterns from the denylist.
64
+ key (`Callable[[T], str]`, *optional*):
65
+ Single-argument function to extract a path from each item. If not provided,
66
+ the `items` must already be `str` or `Path`.
67
+
68
+ Returns:
69
+ Filtered list of objects, as a generator.
70
+
71
+ Raises:
72
+ :class:`ValueError`:
73
+ If `key` is not provided and items are not `str` or `Path`.
74
+
75
+ Example usage with paths:
76
+ ```python
77
+ >>> # Filter only PDFs that are not hidden.
78
+ >>> list(filter_repo_objects(
79
+ ... ["aaa.PDF", "bbb.jpg", ".ccc.pdf", ".ddd.png"],
80
+ ... allow_patterns=["*.pdf"],
81
+ ... ignore_patterns=[".*"],
82
+ ... ))
83
+ ["aaa.pdf"]
84
+ ```
85
+
86
+ Example usage with objects:
87
+ ```python
88
+ >>> list(filter_repo_objects(
89
+ ... [
90
+ ... CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")
91
+ ... CommitOperationAdd(path_or_fileobj="/tmp/bbb.jpg", path_in_repo="bbb.jpg")
92
+ ... CommitOperationAdd(path_or_fileobj="/tmp/.ccc.pdf", path_in_repo=".ccc.pdf")
93
+ ... CommitOperationAdd(path_or_fileobj="/tmp/.ddd.png", path_in_repo=".ddd.png")
94
+ ... ],
95
+ ... allow_patterns=["*.pdf"],
96
+ ... ignore_patterns=[".*"],
97
+ ... key=lambda x: x.repo_in_path
98
+ ... ))
99
+ [CommitOperationAdd(path_or_fileobj="/tmp/aaa.pdf", path_in_repo="aaa.pdf")]
100
+ ```
101
+ """
102
+ if isinstance(allow_patterns, str):
103
+ allow_patterns = [allow_patterns]
104
+
105
+ if isinstance(ignore_patterns, str):
106
+ ignore_patterns = [ignore_patterns]
107
+
108
+ if allow_patterns is not None:
109
+ allow_patterns = [_add_wildcard_to_directories(p) for p in allow_patterns]
110
+ if ignore_patterns is not None:
111
+ ignore_patterns = [_add_wildcard_to_directories(p) for p in ignore_patterns]
112
+
113
+ if key is None:
114
+
115
+ def _identity(item: T) -> str:
116
+ if isinstance(item, str):
117
+ return item
118
+ if isinstance(item, Path):
119
+ return str(item)
120
+ raise ValueError(f"Please provide `key` argument in `filter_repo_objects`: `{item}` is not a string.")
121
+
122
+ key = _identity # Items must be `str` or `Path`, otherwise raise ValueError
123
+
124
+ for item in items:
125
+ path = key(item)
126
+
127
+ # Skip if there's an allowlist and path doesn't match any
128
+ if allow_patterns is not None and not any(fnmatch(path, r) for r in allow_patterns):
129
+ continue
130
+
131
+ # Skip if there's a denylist and path matches any
132
+ if ignore_patterns is not None and any(fnmatch(path, r) for r in ignore_patterns):
133
+ continue
134
+
135
+ yield item
136
+
137
+
138
+ def _add_wildcard_to_directories(pattern: str) -> str:
139
+ if pattern[-1] == "/":
140
+ return pattern + "*"
141
+ return pattern
venv/lib/python3.12/site-packages/huggingface_hub/utils/_safetensors.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import operator
3
+ from collections import defaultdict
4
+ from dataclasses import dataclass, field
5
+ from typing import Dict, List, Literal, Optional, Tuple
6
+
7
+
8
+ FILENAME_T = str
9
+ TENSOR_NAME_T = str
10
+ DTYPE_T = Literal["F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL"]
11
+
12
+
13
+ @dataclass
14
+ class TensorInfo:
15
+ """Information about a tensor.
16
+
17
+ For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
18
+
19
+ Attributes:
20
+ dtype (`str`):
21
+ The data type of the tensor ("F64", "F32", "F16", "BF16", "I64", "I32", "I16", "I8", "U8", "BOOL").
22
+ shape (`List[int]`):
23
+ The shape of the tensor.
24
+ data_offsets (`Tuple[int, int]`):
25
+ The offsets of the data in the file as a tuple `[BEGIN, END]`.
26
+ parameter_count (`int`):
27
+ The number of parameters in the tensor.
28
+ """
29
+
30
+ dtype: DTYPE_T
31
+ shape: List[int]
32
+ data_offsets: Tuple[int, int]
33
+ parameter_count: int = field(init=False)
34
+
35
+ def __post_init__(self) -> None:
36
+ # Taken from https://stackoverflow.com/a/13840436
37
+ try:
38
+ self.parameter_count = functools.reduce(operator.mul, self.shape)
39
+ except TypeError:
40
+ self.parameter_count = 1 # scalar value has no shape
41
+
42
+
43
+ @dataclass
44
+ class SafetensorsFileMetadata:
45
+ """Metadata for a Safetensors file hosted on the Hub.
46
+
47
+ This class is returned by [`parse_safetensors_file_metadata`].
48
+
49
+ For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
50
+
51
+ Attributes:
52
+ metadata (`Dict`):
53
+ The metadata contained in the file.
54
+ tensors (`Dict[str, TensorInfo]`):
55
+ A map of all tensors. Keys are tensor names and values are information about the corresponding tensor, as a
56
+ [`TensorInfo`] object.
57
+ parameter_count (`Dict[str, int]`):
58
+ A map of the number of parameters per data type. Keys are data types and values are the number of parameters
59
+ of that data type.
60
+ """
61
+
62
+ metadata: Dict[str, str]
63
+ tensors: Dict[TENSOR_NAME_T, TensorInfo]
64
+ parameter_count: Dict[DTYPE_T, int] = field(init=False)
65
+
66
+ def __post_init__(self) -> None:
67
+ parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
68
+ for tensor in self.tensors.values():
69
+ parameter_count[tensor.dtype] += tensor.parameter_count
70
+ self.parameter_count = dict(parameter_count)
71
+
72
+
73
+ @dataclass
74
+ class SafetensorsRepoMetadata:
75
+ """Metadata for a Safetensors repo.
76
+
77
+ A repo is considered to be a Safetensors repo if it contains either a 'model.safetensors' weight file (non-shared
78
+ model) or a 'model.safetensors.index.json' index file (sharded model) at its root.
79
+
80
+ This class is returned by [`get_safetensors_metadata`].
81
+
82
+ For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
83
+
84
+ Attributes:
85
+ metadata (`Dict`, *optional*):
86
+ The metadata contained in the 'model.safetensors.index.json' file, if it exists. Only populated for sharded
87
+ models.
88
+ sharded (`bool`):
89
+ Whether the repo contains a sharded model or not.
90
+ weight_map (`Dict[str, str]`):
91
+ A map of all weights. Keys are tensor names and values are filenames of the files containing the tensors.
92
+ files_metadata (`Dict[str, SafetensorsFileMetadata]`):
93
+ A map of all files metadata. Keys are filenames and values are the metadata of the corresponding file, as
94
+ a [`SafetensorsFileMetadata`] object.
95
+ parameter_count (`Dict[str, int]`):
96
+ A map of the number of parameters per data type. Keys are data types and values are the number of parameters
97
+ of that data type.
98
+ """
99
+
100
+ metadata: Optional[Dict]
101
+ sharded: bool
102
+ weight_map: Dict[TENSOR_NAME_T, FILENAME_T] # tensor name -> filename
103
+ files_metadata: Dict[FILENAME_T, SafetensorsFileMetadata] # filename -> metadata
104
+ parameter_count: Dict[DTYPE_T, int] = field(init=False)
105
+
106
+ def __post_init__(self) -> None:
107
+ parameter_count: Dict[DTYPE_T, int] = defaultdict(int)
108
+ for file_metadata in self.files_metadata.values():
109
+ for dtype, nb_parameters_ in file_metadata.parameter_count.items():
110
+ parameter_count[dtype] += nb_parameters_
111
+ self.parameter_count = dict(parameter_count)
venv/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to validate argument values in `huggingface_hub`."""
16
+
17
+ import inspect
18
+ import re
19
+ import warnings
20
+ from functools import wraps
21
+ from itertools import chain
22
+ from typing import Any, Dict
23
+
24
+ from huggingface_hub.errors import HFValidationError
25
+
26
+ from ._typing import CallableT
27
+
28
+
29
+ REPO_ID_REGEX = re.compile(
30
+ r"""
31
+ ^
32
+ (\b[\w\-.]+\b/)? # optional namespace (username or organization)
33
+ \b # starts with a word boundary
34
+ [\w\-.]{1,96} # repo_name: alphanumeric + . _ -
35
+ \b # ends with a word boundary
36
+ $
37
+ """,
38
+ flags=re.VERBOSE,
39
+ )
40
+
41
+
42
+ def validate_hf_hub_args(fn: CallableT) -> CallableT:
43
+ """Validate values received as argument for any public method of `huggingface_hub`.
44
+
45
+ The goal of this decorator is to harmonize validation of arguments reused
46
+ everywhere. By default, all defined validators are tested.
47
+
48
+ Validators:
49
+ - [`~utils.validate_repo_id`]: `repo_id` must be `"repo_name"`
50
+ or `"namespace/repo_name"`. Namespace is a username or an organization.
51
+ - [`~utils.smoothly_deprecate_use_auth_token`]: Use `token` instead of
52
+ `use_auth_token` (only if `use_auth_token` is not expected by the decorated
53
+ function - in practice, always the case in `huggingface_hub`).
54
+
55
+ Example:
56
+ ```py
57
+ >>> from huggingface_hub.utils import validate_hf_hub_args
58
+
59
+ >>> @validate_hf_hub_args
60
+ ... def my_cool_method(repo_id: str):
61
+ ... print(repo_id)
62
+
63
+ >>> my_cool_method(repo_id="valid_repo_id")
64
+ valid_repo_id
65
+
66
+ >>> my_cool_method("other..repo..id")
67
+ huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
68
+
69
+ >>> my_cool_method(repo_id="other..repo..id")
70
+ huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
71
+
72
+ >>> @validate_hf_hub_args
73
+ ... def my_cool_auth_method(token: str):
74
+ ... print(token)
75
+
76
+ >>> my_cool_auth_method(token="a token")
77
+ "a token"
78
+
79
+ >>> my_cool_auth_method(use_auth_token="a use_auth_token")
80
+ "a use_auth_token"
81
+
82
+ >>> my_cool_auth_method(token="a token", use_auth_token="a use_auth_token")
83
+ UserWarning: Both `token` and `use_auth_token` are passed (...)
84
+ "a token"
85
+ ```
86
+
87
+ Raises:
88
+ [`~utils.HFValidationError`]:
89
+ If an input is not valid.
90
+ """
91
+ # TODO: add an argument to opt-out validation for specific argument?
92
+ signature = inspect.signature(fn)
93
+
94
+ # Should the validator switch `use_auth_token` values to `token`? In practice, always
95
+ # True in `huggingface_hub`. Might not be the case in a downstream library.
96
+ check_use_auth_token = "use_auth_token" not in signature.parameters and "token" in signature.parameters
97
+
98
+ @wraps(fn)
99
+ def _inner_fn(*args, **kwargs):
100
+ has_token = False
101
+ for arg_name, arg_value in chain(
102
+ zip(signature.parameters, args), # Args values
103
+ kwargs.items(), # Kwargs values
104
+ ):
105
+ if arg_name in ["repo_id", "from_id", "to_id"]:
106
+ validate_repo_id(arg_value)
107
+
108
+ elif arg_name == "token" and arg_value is not None:
109
+ has_token = True
110
+
111
+ if check_use_auth_token:
112
+ kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
113
+
114
+ return fn(*args, **kwargs)
115
+
116
+ return _inner_fn # type: ignore
117
+
118
+
119
+ def validate_repo_id(repo_id: str) -> None:
120
+ """Validate `repo_id` is valid.
121
+
122
+ This is not meant to replace the proper validation made on the Hub but rather to
123
+ avoid local inconsistencies whenever possible (example: passing `repo_type` in the
124
+ `repo_id` is forbidden).
125
+
126
+ Rules:
127
+ - Between 1 and 96 characters.
128
+ - Either "repo_name" or "namespace/repo_name"
129
+ - [a-zA-Z0-9] or "-", "_", "."
130
+ - "--" and ".." are forbidden
131
+
132
+ Valid: `"foo"`, `"foo/bar"`, `"123"`, `"Foo-BAR_foo.bar123"`
133
+
134
+ Not valid: `"datasets/foo/bar"`, `".repo_id"`, `"foo--bar"`, `"foo.git"`
135
+
136
+ Example:
137
+ ```py
138
+ >>> from huggingface_hub.utils import validate_repo_id
139
+ >>> validate_repo_id(repo_id="valid_repo_id")
140
+ >>> validate_repo_id(repo_id="other..repo..id")
141
+ huggingface_hub.utils._validators.HFValidationError: Cannot have -- or .. in repo_id: 'other..repo..id'.
142
+ ```
143
+
144
+ Discussed in https://github.com/huggingface/huggingface_hub/issues/1008.
145
+ In moon-landing (internal repository):
146
+ - https://github.com/huggingface/moon-landing/blob/main/server/lib/Names.ts#L27
147
+ - https://github.com/huggingface/moon-landing/blob/main/server/views/components/NewRepoForm/NewRepoForm.svelte#L138
148
+ """
149
+ if not isinstance(repo_id, str):
150
+ # Typically, a Path is not a repo_id
151
+ raise HFValidationError(f"Repo id must be a string, not {type(repo_id)}: '{repo_id}'.")
152
+
153
+ if repo_id.count("/") > 1:
154
+ raise HFValidationError(
155
+ "Repo id must be in the form 'repo_name' or 'namespace/repo_name':"
156
+ f" '{repo_id}'. Use `repo_type` argument if needed."
157
+ )
158
+
159
+ if not REPO_ID_REGEX.match(repo_id):
160
+ raise HFValidationError(
161
+ "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are"
162
+ " forbidden, '-' and '.' cannot start or end the name, max length is 96:"
163
+ f" '{repo_id}'."
164
+ )
165
+
166
+ if "--" in repo_id or ".." in repo_id:
167
+ raise HFValidationError(f"Cannot have -- or .. in repo_id: '{repo_id}'.")
168
+
169
+ if repo_id.endswith(".git"):
170
+ raise HFValidationError(f"Repo_id cannot end by '.git': '{repo_id}'.")
171
+
172
+
173
+ def smoothly_deprecate_use_auth_token(fn_name: str, has_token: bool, kwargs: Dict[str, Any]) -> Dict[str, Any]:
174
+ """Smoothly deprecate `use_auth_token` in the `huggingface_hub` codebase.
175
+
176
+ The long-term goal is to remove any mention of `use_auth_token` in the codebase in
177
+ favor of a unique and less verbose `token` argument. This will be done a few steps:
178
+
179
+ 0. Step 0: methods that require a read-access to the Hub use the `use_auth_token`
180
+ argument (`str`, `bool` or `None`). Methods requiring write-access have a `token`
181
+ argument (`str`, `None`). This implicit rule exists to be able to not send the
182
+ token when not necessary (`use_auth_token=False`) even if logged in.
183
+
184
+ 1. Step 1: we want to harmonize everything and use `token` everywhere (supporting
185
+ `token=False` for read-only methods). In order not to break existing code, if
186
+ `use_auth_token` is passed to a function, the `use_auth_token` value is passed
187
+ as `token` instead, without any warning.
188
+ a. Corner case: if both `use_auth_token` and `token` values are passed, a warning
189
+ is thrown and the `use_auth_token` value is ignored.
190
+
191
+ 2. Step 2: Once it is release, we should push downstream libraries to switch from
192
+ `use_auth_token` to `token` as much as possible, but without throwing a warning
193
+ (e.g. manually create issues on the corresponding repos).
194
+
195
+ 3. Step 3: After a transitional period (6 months e.g. until April 2023?), we update
196
+ `huggingface_hub` to throw a warning on `use_auth_token`. Hopefully, very few
197
+ users will be impacted as it would have already been fixed.
198
+ In addition, unit tests in `huggingface_hub` must be adapted to expect warnings
199
+ to be thrown (but still use `use_auth_token` as before).
200
+
201
+ 4. Step 4: After a normal deprecation cycle (3 releases ?), remove this validator.
202
+ `use_auth_token` will definitely not be supported.
203
+ In addition, we update unit tests in `huggingface_hub` to use `token` everywhere.
204
+
205
+ This has been discussed in:
206
+ - https://github.com/huggingface/huggingface_hub/issues/1094.
207
+ - https://github.com/huggingface/huggingface_hub/pull/928
208
+ - (related) https://github.com/huggingface/huggingface_hub/pull/1064
209
+ """
210
+ new_kwargs = kwargs.copy() # do not mutate input !
211
+
212
+ use_auth_token = new_kwargs.pop("use_auth_token", None) # remove from kwargs
213
+ if use_auth_token is not None:
214
+ if has_token:
215
+ warnings.warn(
216
+ "Both `token` and `use_auth_token` are passed to"
217
+ f" `{fn_name}` with non-None values. `token` is now the"
218
+ " preferred argument to pass a User Access Token."
219
+ " `use_auth_token` value will be ignored."
220
+ )
221
+ else:
222
+ # `token` argument is not passed and a non-None value is passed in
223
+ # `use_auth_token` => use `use_auth_token` value as `token` kwarg.
224
+ new_kwargs["token"] = use_auth_token
225
+
226
+ return new_kwargs
venv/lib/python3.12/site-packages/huggingface_hub/utils/_xet.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+ from typing import Dict, Optional
4
+
5
+ import requests
6
+
7
+ from .. import constants
8
+ from . import get_session, hf_raise_for_status, validate_hf_hub_args
9
+
10
+
11
+ class XetTokenType(str, Enum):
12
+ READ = "read"
13
+ WRITE = "write"
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class XetFileData:
18
+ file_hash: str
19
+ refresh_route: str
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class XetConnectionInfo:
24
+ access_token: str
25
+ expiration_unix_epoch: int
26
+ endpoint: str
27
+
28
+
29
+ def parse_xet_file_data_from_response(response: requests.Response) -> Optional[XetFileData]:
30
+ """
31
+ Parse XET file metadata from an HTTP response.
32
+
33
+ This function extracts XET file metadata from the HTTP headers or HTTP links
34
+ of a given response object. If the required metadata is not found, it returns `None`.
35
+
36
+ Args:
37
+ response (`requests.Response`):
38
+ The HTTP response object containing headers dict and links dict to extract the XET metadata from.
39
+ Returns:
40
+ `Optional[XetFileData]`:
41
+ An instance of `XetFileData` containing the file hash and refresh route if the metadata
42
+ is found. Returns `None` if the required metadata is missing.
43
+ """
44
+ if response is None:
45
+ return None
46
+ try:
47
+ file_hash = response.headers[constants.HUGGINGFACE_HEADER_X_XET_HASH]
48
+
49
+ if constants.HUGGINGFACE_HEADER_LINK_XET_AUTH_KEY in response.links:
50
+ refresh_route = response.links[constants.HUGGINGFACE_HEADER_LINK_XET_AUTH_KEY]["url"]
51
+ else:
52
+ refresh_route = response.headers[constants.HUGGINGFACE_HEADER_X_XET_REFRESH_ROUTE]
53
+ except KeyError:
54
+ return None
55
+
56
+ return XetFileData(
57
+ file_hash=file_hash,
58
+ refresh_route=refresh_route,
59
+ )
60
+
61
+
62
+ def parse_xet_connection_info_from_headers(headers: Dict[str, str]) -> Optional[XetConnectionInfo]:
63
+ """
64
+ Parse XET connection info from the HTTP headers or return None if not found.
65
+ Args:
66
+ headers (`Dict`):
67
+ HTTP headers to extract the XET metadata from.
68
+ Returns:
69
+ `XetConnectionInfo` or `None`:
70
+ The information needed to connect to the XET storage service.
71
+ Returns `None` if the headers do not contain the XET connection info.
72
+ """
73
+ try:
74
+ endpoint = headers[constants.HUGGINGFACE_HEADER_X_XET_ENDPOINT]
75
+ access_token = headers[constants.HUGGINGFACE_HEADER_X_XET_ACCESS_TOKEN]
76
+ expiration_unix_epoch = int(headers[constants.HUGGINGFACE_HEADER_X_XET_EXPIRATION])
77
+ except (KeyError, ValueError, TypeError):
78
+ return None
79
+
80
+ return XetConnectionInfo(
81
+ endpoint=endpoint,
82
+ access_token=access_token,
83
+ expiration_unix_epoch=expiration_unix_epoch,
84
+ )
85
+
86
+
87
+ @validate_hf_hub_args
88
+ def refresh_xet_connection_info(
89
+ *,
90
+ file_data: XetFileData,
91
+ headers: Dict[str, str],
92
+ ) -> XetConnectionInfo:
93
+ """
94
+ Utilizes the information in the parsed metadata to request the Hub xet connection information.
95
+ This includes the access token, expiration, and XET service URL.
96
+ Args:
97
+ file_data: (`XetFileData`):
98
+ The file data needed to refresh the xet connection information.
99
+ headers (`Dict[str, str]`):
100
+ Headers to use for the request, including authorization headers and user agent.
101
+ Returns:
102
+ `XetConnectionInfo`:
103
+ The connection information needed to make the request to the xet storage service.
104
+ Raises:
105
+ [`~utils.HfHubHTTPError`]
106
+ If the Hub API returned an error.
107
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
108
+ If the Hub API response is improperly formatted.
109
+ """
110
+ if file_data.refresh_route is None:
111
+ raise ValueError("The provided xet metadata does not contain a refresh endpoint.")
112
+ return _fetch_xet_connection_info_with_url(file_data.refresh_route, headers)
113
+
114
+
115
+ @validate_hf_hub_args
116
+ def fetch_xet_connection_info_from_repo_info(
117
+ *,
118
+ token_type: XetTokenType,
119
+ repo_id: str,
120
+ repo_type: str,
121
+ revision: Optional[str] = None,
122
+ headers: Dict[str, str],
123
+ endpoint: Optional[str] = None,
124
+ params: Optional[Dict[str, str]] = None,
125
+ ) -> XetConnectionInfo:
126
+ """
127
+ Uses the repo info to request a xet access token from Hub.
128
+ Args:
129
+ token_type (`XetTokenType`):
130
+ Type of the token to request: `"read"` or `"write"`.
131
+ repo_id (`str`):
132
+ A namespace (user or an organization) and a repo name separated by a `/`.
133
+ repo_type (`str`):
134
+ Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
135
+ revision (`str`, `optional`):
136
+ The revision of the repo to get the token for.
137
+ headers (`Dict[str, str]`):
138
+ Headers to use for the request, including authorization headers and user agent.
139
+ endpoint (`str`, `optional`):
140
+ The endpoint to use for the request. Defaults to the Hub endpoint.
141
+ params (`Dict[str, str]`, `optional`):
142
+ Additional parameters to pass with the request.
143
+ Returns:
144
+ `XetConnectionInfo`:
145
+ The connection information needed to make the request to the xet storage service.
146
+ Raises:
147
+ [`~utils.HfHubHTTPError`]
148
+ If the Hub API returned an error.
149
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
150
+ If the Hub API response is improperly formatted.
151
+ """
152
+ endpoint = endpoint if endpoint is not None else constants.ENDPOINT
153
+ url = f"{endpoint}/api/{repo_type}s/{repo_id}/xet-{token_type.value}-token/{revision}"
154
+ return _fetch_xet_connection_info_with_url(url, headers, params)
155
+
156
+
157
+ @validate_hf_hub_args
158
+ def _fetch_xet_connection_info_with_url(
159
+ url: str,
160
+ headers: Dict[str, str],
161
+ params: Optional[Dict[str, str]] = None,
162
+ ) -> XetConnectionInfo:
163
+ """
164
+ Requests the xet connection info from the supplied URL. This includes the
165
+ access token, expiration time, and endpoint to use for the xet storage service.
166
+ Args:
167
+ url: (`str`):
168
+ The access token endpoint URL.
169
+ headers (`Dict[str, str]`):
170
+ Headers to use for the request, including authorization headers and user agent.
171
+ params (`Dict[str, str]`, `optional`):
172
+ Additional parameters to pass with the request.
173
+ Returns:
174
+ `XetConnectionInfo`:
175
+ The connection information needed to make the request to the xet storage service.
176
+ Raises:
177
+ [`~utils.HfHubHTTPError`]
178
+ If the Hub API returned an error.
179
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
180
+ If the Hub API response is improperly formatted.
181
+ """
182
+ resp = get_session().get(headers=headers, url=url, params=params)
183
+ hf_raise_for_status(resp)
184
+
185
+ metadata = parse_xet_connection_info_from_headers(resp.headers) # type: ignore
186
+ if metadata is None:
187
+ raise ValueError("Xet headers have not been correctly set by the server.")
188
+ return metadata
venv/lib/python3.12/site-packages/huggingface_hub/utils/insecure_hashlib.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Taken from https://github.com/mlflow/mlflow/pull/10119
2
+ #
3
+ # DO NOT use this function for security purposes (e.g., password hashing).
4
+ #
5
+ # In Python >= 3.9, insecure hashing algorithms such as MD5 fail in FIPS-compliant
6
+ # environments unless `usedforsecurity=False` is explicitly passed.
7
+ #
8
+ # References:
9
+ # - https://github.com/mlflow/mlflow/issues/9905
10
+ # - https://github.com/mlflow/mlflow/pull/10119
11
+ # - https://docs.python.org/3/library/hashlib.html
12
+ # - https://github.com/huggingface/transformers/pull/27038
13
+ #
14
+ # Usage:
15
+ # ```python
16
+ # # Use
17
+ # from huggingface_hub.utils.insecure_hashlib import sha256
18
+ # # instead of
19
+ # from hashlib import sha256
20
+ #
21
+ # # Use
22
+ # from huggingface_hub.utils import insecure_hashlib
23
+ # # instead of
24
+ # import hashlib
25
+ # ```
26
+ import functools
27
+ import hashlib
28
+ import sys
29
+
30
+
31
+ if sys.version_info >= (3, 9):
32
+ md5 = functools.partial(hashlib.md5, usedforsecurity=False)
33
+ sha1 = functools.partial(hashlib.sha1, usedforsecurity=False)
34
+ sha256 = functools.partial(hashlib.sha256, usedforsecurity=False)
35
+ else:
36
+ md5 = hashlib.md5
37
+ sha1 = hashlib.sha1
38
+ sha256 = hashlib.sha256
venv/lib/python3.12/site-packages/huggingface_hub/utils/logging.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2020 Optuna, Hugging Face
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Logging utilities."""
16
+
17
+ import logging
18
+ import os
19
+ from logging import (
20
+ CRITICAL, # NOQA
21
+ DEBUG, # NOQA
22
+ ERROR, # NOQA
23
+ FATAL, # NOQA
24
+ INFO, # NOQA
25
+ NOTSET, # NOQA
26
+ WARN, # NOQA
27
+ WARNING, # NOQA
28
+ )
29
+ from typing import Optional
30
+
31
+ from .. import constants
32
+
33
+
34
+ log_levels = {
35
+ "debug": logging.DEBUG,
36
+ "info": logging.INFO,
37
+ "warning": logging.WARNING,
38
+ "error": logging.ERROR,
39
+ "critical": logging.CRITICAL,
40
+ }
41
+
42
+ _default_log_level = logging.WARNING
43
+
44
+
45
+ def _get_library_name() -> str:
46
+ return __name__.split(".")[0]
47
+
48
+
49
+ def _get_library_root_logger() -> logging.Logger:
50
+ return logging.getLogger(_get_library_name())
51
+
52
+
53
+ def _get_default_logging_level():
54
+ """
55
+ If `HF_HUB_VERBOSITY` env var is set to one of the valid choices return that as the new default level. If it is not
56
+ - fall back to `_default_log_level`
57
+ """
58
+ env_level_str = os.getenv("HF_HUB_VERBOSITY", None)
59
+ if env_level_str:
60
+ if env_level_str in log_levels:
61
+ return log_levels[env_level_str]
62
+ else:
63
+ logging.getLogger().warning(
64
+ f"Unknown option HF_HUB_VERBOSITY={env_level_str}, has to be one of: {', '.join(log_levels.keys())}"
65
+ )
66
+ return _default_log_level
67
+
68
+
69
+ def _configure_library_root_logger() -> None:
70
+ library_root_logger = _get_library_root_logger()
71
+ library_root_logger.addHandler(logging.StreamHandler())
72
+ library_root_logger.setLevel(_get_default_logging_level())
73
+
74
+
75
+ def _reset_library_root_logger() -> None:
76
+ library_root_logger = _get_library_root_logger()
77
+ library_root_logger.setLevel(logging.NOTSET)
78
+
79
+
80
+ def get_logger(name: Optional[str] = None) -> logging.Logger:
81
+ """
82
+ Returns a logger with the specified name. This function is not supposed
83
+ to be directly accessed by library users.
84
+
85
+ Args:
86
+ name (`str`, *optional*):
87
+ The name of the logger to get, usually the filename
88
+
89
+ Example:
90
+
91
+ ```python
92
+ >>> from huggingface_hub import get_logger
93
+
94
+ >>> logger = get_logger(__file__)
95
+ >>> logger.set_verbosity_info()
96
+ ```
97
+ """
98
+
99
+ if name is None:
100
+ name = _get_library_name()
101
+
102
+ return logging.getLogger(name)
103
+
104
+
105
+ def get_verbosity() -> int:
106
+ """Return the current level for the HuggingFace Hub's root logger.
107
+
108
+ Returns:
109
+ Logging level, e.g., `huggingface_hub.logging.DEBUG` and
110
+ `huggingface_hub.logging.INFO`.
111
+
112
+ <Tip>
113
+
114
+ HuggingFace Hub has following logging levels:
115
+
116
+ - `huggingface_hub.logging.CRITICAL`, `huggingface_hub.logging.FATAL`
117
+ - `huggingface_hub.logging.ERROR`
118
+ - `huggingface_hub.logging.WARNING`, `huggingface_hub.logging.WARN`
119
+ - `huggingface_hub.logging.INFO`
120
+ - `huggingface_hub.logging.DEBUG`
121
+
122
+ </Tip>
123
+ """
124
+ return _get_library_root_logger().getEffectiveLevel()
125
+
126
+
127
+ def set_verbosity(verbosity: int) -> None:
128
+ """
129
+ Sets the level for the HuggingFace Hub's root logger.
130
+
131
+ Args:
132
+ verbosity (`int`):
133
+ Logging level, e.g., `huggingface_hub.logging.DEBUG` and
134
+ `huggingface_hub.logging.INFO`.
135
+ """
136
+ _get_library_root_logger().setLevel(verbosity)
137
+
138
+
139
+ def set_verbosity_info():
140
+ """
141
+ Sets the verbosity to `logging.INFO`.
142
+ """
143
+ return set_verbosity(INFO)
144
+
145
+
146
+ def set_verbosity_warning():
147
+ """
148
+ Sets the verbosity to `logging.WARNING`.
149
+ """
150
+ return set_verbosity(WARNING)
151
+
152
+
153
+ def set_verbosity_debug():
154
+ """
155
+ Sets the verbosity to `logging.DEBUG`.
156
+ """
157
+ return set_verbosity(DEBUG)
158
+
159
+
160
+ def set_verbosity_error():
161
+ """
162
+ Sets the verbosity to `logging.ERROR`.
163
+ """
164
+ return set_verbosity(ERROR)
165
+
166
+
167
+ def disable_propagation() -> None:
168
+ """
169
+ Disable propagation of the library log outputs. Note that log propagation is
170
+ disabled by default.
171
+ """
172
+ _get_library_root_logger().propagate = False
173
+
174
+
175
+ def enable_propagation() -> None:
176
+ """
177
+ Enable propagation of the library log outputs. Please disable the
178
+ HuggingFace Hub's default handler to prevent double logging if the root
179
+ logger has been configured.
180
+ """
181
+ _get_library_root_logger().propagate = True
182
+
183
+
184
+ _configure_library_root_logger()
185
+
186
+ if constants.HF_DEBUG:
187
+ # If `HF_DEBUG` environment variable is set, set the verbosity of `huggingface_hub` logger to `DEBUG`.
188
+ set_verbosity_debug()
venv/lib/python3.12/site-packages/huggingface_hub/utils/sha.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utilities to efficiently compute the SHA 256 hash of a bunch of bytes."""
2
+
3
+ from typing import BinaryIO, Optional
4
+
5
+ from .insecure_hashlib import sha1, sha256
6
+
7
+
8
+ def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes:
9
+ """
10
+ Computes the sha256 hash of the given file object, by chunks of size `chunk_size`.
11
+
12
+ Args:
13
+ fileobj (file-like object):
14
+ The File object to compute sha256 for, typically obtained with `open(path, "rb")`
15
+ chunk_size (`int`, *optional*):
16
+ The number of bytes to read from `fileobj` at once, defaults to 1MB.
17
+
18
+ Returns:
19
+ `bytes`: `fileobj`'s sha256 hash as bytes
20
+ """
21
+ chunk_size = chunk_size if chunk_size is not None else 1024 * 1024
22
+
23
+ sha = sha256()
24
+ while True:
25
+ chunk = fileobj.read(chunk_size)
26
+ sha.update(chunk)
27
+ if not chunk:
28
+ break
29
+ return sha.digest()
30
+
31
+
32
+ def git_hash(data: bytes) -> str:
33
+ """
34
+ Computes the git-sha1 hash of the given bytes, using the same algorithm as git.
35
+
36
+ This is equivalent to running `git hash-object`. See https://git-scm.com/docs/git-hash-object
37
+ for more details.
38
+
39
+ Note: this method is valid for regular files. For LFS files, the proper git hash is supposed to be computed on the
40
+ pointer file content, not the actual file content. However, for simplicity, we directly compare the sha256 of
41
+ the LFS file content when we want to compare LFS files.
42
+
43
+ Args:
44
+ data (`bytes`):
45
+ The data to compute the git-hash for.
46
+
47
+ Returns:
48
+ `str`: the git-hash of `data` as an hexadecimal string.
49
+
50
+ Example:
51
+ ```python
52
+ >>> from huggingface_hub.utils.sha import git_hash
53
+ >>> git_hash(b"Hello, World!")
54
+ 'b45ef6fec89518d314f546fd6c3025367b721684'
55
+ ```
56
+ """
57
+ # Taken from https://gist.github.com/msabramo/763200
58
+ # Note: no need to optimize by reading the file in chunks as we're not supposed to hash huge files (5MB maximum).
59
+ sha = sha1()
60
+ sha.update(b"blob ")
61
+ sha.update(str(len(data)).encode())
62
+ sha.update(b"\0")
63
+ sha.update(data)
64
+ return sha.hexdigest()
venv/lib/python3.12/site-packages/idna/__pycache__/core.cpython-312.pyc ADDED
Binary file (16.2 kB). View file
 
venv/lib/python3.12/site-packages/idna/__pycache__/idnadata.cpython-312.pyc ADDED
Binary file (99.5 kB). View file
 
venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This software is made available under the terms of *either* of the licenses
2
+ found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
3
+ under the terms of *both* these licenses.
venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE.APACHE ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
venv/lib/python3.12/site-packages/packaging-25.0.dist-info/licenses/LICENSE.BSD ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) Donald Stufft and individual contributors.
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice,
8
+ this list of conditions and the following disclaimer.
9
+
10
+ 2. Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+
14
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
venv/lib/python3.12/site-packages/pip/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (682 Bytes). View file
 
venv/lib/python3.12/site-packages/pip/__pycache__/__main__.cpython-312.pyc ADDED
Binary file (838 Bytes). View file
 
venv/lib/python3.12/site-packages/pip/__pycache__/__pip-runner__.cpython-312.pyc ADDED
Binary file (2.2 kB). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+
3
+ from pip._internal.utils import _log
4
+
5
+ # init_logging() must be called before any call to logging.getLogger()
6
+ # which happens at import of most modules.
7
+ _log.init_logging()
8
+
9
+
10
+ def main(args: (Optional[List[str]]) = None) -> int:
11
+ """This is preserved for old console scripts that may still be referencing
12
+ it.
13
+
14
+ For additional details, see https://github.com/pypa/pip/issues/7498.
15
+ """
16
+ from pip._internal.utils.entrypoints import _wrapper
17
+
18
+ return _wrapper(args)
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (784 Bytes). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/build_env.cpython-312.pyc ADDED
Binary file (14.3 kB). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/cache.cpython-312.pyc ADDED
Binary file (12.7 kB). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/configuration.cpython-312.pyc ADDED
Binary file (17.7 kB). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/exceptions.cpython-312.pyc ADDED
Binary file (33.3 kB). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/main.cpython-312.pyc ADDED
Binary file (667 Bytes). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/pyproject.cpython-312.pyc ADDED
Binary file (4.97 kB). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/self_outdated_check.cpython-312.pyc ADDED
Binary file (10.5 kB). View file
 
venv/lib/python3.12/site-packages/pip/_internal/__pycache__/wheel_builder.cpython-312.pyc ADDED
Binary file (13.6 kB). View file
 
venv/lib/python3.12/site-packages/pip/_internal/build_env.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Build Environment used for isolation during sdist building
2
+ """
3
+
4
+ import logging
5
+ import os
6
+ import pathlib
7
+ import site
8
+ import sys
9
+ import textwrap
10
+ from collections import OrderedDict
11
+ from types import TracebackType
12
+ from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type, Union
13
+
14
+ from pip._vendor.certifi import where
15
+ from pip._vendor.packaging.requirements import Requirement
16
+ from pip._vendor.packaging.version import Version
17
+
18
+ from pip import __file__ as pip_location
19
+ from pip._internal.cli.spinners import open_spinner
20
+ from pip._internal.locations import get_platlib, get_purelib, get_scheme
21
+ from pip._internal.metadata import get_default_environment, get_environment
22
+ from pip._internal.utils.subprocess import call_subprocess
23
+ from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
24
+
25
+ if TYPE_CHECKING:
26
+ from pip._internal.index.package_finder import PackageFinder
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ def _dedup(a: str, b: str) -> Union[Tuple[str], Tuple[str, str]]:
32
+ return (a, b) if a != b else (a,)
33
+
34
+
35
+ class _Prefix:
36
+ def __init__(self, path: str) -> None:
37
+ self.path = path
38
+ self.setup = False
39
+ scheme = get_scheme("", prefix=path)
40
+ self.bin_dir = scheme.scripts
41
+ self.lib_dirs = _dedup(scheme.purelib, scheme.platlib)
42
+
43
+
44
+ def get_runnable_pip() -> str:
45
+ """Get a file to pass to a Python executable, to run the currently-running pip.
46
+
47
+ This is used to run a pip subprocess, for installing requirements into the build
48
+ environment.
49
+ """
50
+ source = pathlib.Path(pip_location).resolve().parent
51
+
52
+ if not source.is_dir():
53
+ # This would happen if someone is using pip from inside a zip file. In that
54
+ # case, we can use that directly.
55
+ return str(source)
56
+
57
+ return os.fsdecode(source / "__pip-runner__.py")
58
+
59
+
60
+ def _get_system_sitepackages() -> Set[str]:
61
+ """Get system site packages
62
+
63
+ Usually from site.getsitepackages,
64
+ but fallback on `get_purelib()/get_platlib()` if unavailable
65
+ (e.g. in a virtualenv created by virtualenv<20)
66
+
67
+ Returns normalized set of strings.
68
+ """
69
+ if hasattr(site, "getsitepackages"):
70
+ system_sites = site.getsitepackages()
71
+ else:
72
+ # virtualenv < 20 overwrites site.py without getsitepackages
73
+ # fallback on get_purelib/get_platlib.
74
+ # this is known to miss things, but shouldn't in the cases
75
+ # where getsitepackages() has been removed (inside a virtualenv)
76
+ system_sites = [get_purelib(), get_platlib()]
77
+ return {os.path.normcase(path) for path in system_sites}
78
+
79
+
80
+ class BuildEnvironment:
81
+ """Creates and manages an isolated environment to install build deps"""
82
+
83
+ def __init__(self) -> None:
84
+ temp_dir = TempDirectory(kind=tempdir_kinds.BUILD_ENV, globally_managed=True)
85
+
86
+ self._prefixes = OrderedDict(
87
+ (name, _Prefix(os.path.join(temp_dir.path, name)))
88
+ for name in ("normal", "overlay")
89
+ )
90
+
91
+ self._bin_dirs: List[str] = []
92
+ self._lib_dirs: List[str] = []
93
+ for prefix in reversed(list(self._prefixes.values())):
94
+ self._bin_dirs.append(prefix.bin_dir)
95
+ self._lib_dirs.extend(prefix.lib_dirs)
96
+
97
+ # Customize site to:
98
+ # - ensure .pth files are honored
99
+ # - prevent access to system site packages
100
+ system_sites = _get_system_sitepackages()
101
+
102
+ self._site_dir = os.path.join(temp_dir.path, "site")
103
+ if not os.path.exists(self._site_dir):
104
+ os.mkdir(self._site_dir)
105
+ with open(
106
+ os.path.join(self._site_dir, "sitecustomize.py"), "w", encoding="utf-8"
107
+ ) as fp:
108
+ fp.write(
109
+ textwrap.dedent(
110
+ """
111
+ import os, site, sys
112
+
113
+ # First, drop system-sites related paths.
114
+ original_sys_path = sys.path[:]
115
+ known_paths = set()
116
+ for path in {system_sites!r}:
117
+ site.addsitedir(path, known_paths=known_paths)
118
+ system_paths = set(
119
+ os.path.normcase(path)
120
+ for path in sys.path[len(original_sys_path):]
121
+ )
122
+ original_sys_path = [
123
+ path for path in original_sys_path
124
+ if os.path.normcase(path) not in system_paths
125
+ ]
126
+ sys.path = original_sys_path
127
+
128
+ # Second, add lib directories.
129
+ # ensuring .pth file are processed.
130
+ for path in {lib_dirs!r}:
131
+ assert not path in sys.path
132
+ site.addsitedir(path)
133
+ """
134
+ ).format(system_sites=system_sites, lib_dirs=self._lib_dirs)
135
+ )
136
+
137
+ def __enter__(self) -> None:
138
+ self._save_env = {
139
+ name: os.environ.get(name, None)
140
+ for name in ("PATH", "PYTHONNOUSERSITE", "PYTHONPATH")
141
+ }
142
+
143
+ path = self._bin_dirs[:]
144
+ old_path = self._save_env["PATH"]
145
+ if old_path:
146
+ path.extend(old_path.split(os.pathsep))
147
+
148
+ pythonpath = [self._site_dir]
149
+
150
+ os.environ.update(
151
+ {
152
+ "PATH": os.pathsep.join(path),
153
+ "PYTHONNOUSERSITE": "1",
154
+ "PYTHONPATH": os.pathsep.join(pythonpath),
155
+ }
156
+ )
157
+
158
+ def __exit__(
159
+ self,
160
+ exc_type: Optional[Type[BaseException]],
161
+ exc_val: Optional[BaseException],
162
+ exc_tb: Optional[TracebackType],
163
+ ) -> None:
164
+ for varname, old_value in self._save_env.items():
165
+ if old_value is None:
166
+ os.environ.pop(varname, None)
167
+ else:
168
+ os.environ[varname] = old_value
169
+
170
+ def check_requirements(
171
+ self, reqs: Iterable[str]
172
+ ) -> Tuple[Set[Tuple[str, str]], Set[str]]:
173
+ """Return 2 sets:
174
+ - conflicting requirements: set of (installed, wanted) reqs tuples
175
+ - missing requirements: set of reqs
176
+ """
177
+ missing = set()
178
+ conflicting = set()
179
+ if reqs:
180
+ env = (
181
+ get_environment(self._lib_dirs)
182
+ if hasattr(self, "_lib_dirs")
183
+ else get_default_environment()
184
+ )
185
+ for req_str in reqs:
186
+ req = Requirement(req_str)
187
+ # We're explicitly evaluating with an empty extra value, since build
188
+ # environments are not provided any mechanism to select specific extras.
189
+ if req.marker is not None and not req.marker.evaluate({"extra": ""}):
190
+ continue
191
+ dist = env.get_distribution(req.name)
192
+ if not dist:
193
+ missing.add(req_str)
194
+ continue
195
+ if isinstance(dist.version, Version):
196
+ installed_req_str = f"{req.name}=={dist.version}"
197
+ else:
198
+ installed_req_str = f"{req.name}==={dist.version}"
199
+ if not req.specifier.contains(dist.version, prereleases=True):
200
+ conflicting.add((installed_req_str, req_str))
201
+ # FIXME: Consider direct URL?
202
+ return conflicting, missing
203
+
204
+ def install_requirements(
205
+ self,
206
+ finder: "PackageFinder",
207
+ requirements: Iterable[str],
208
+ prefix_as_string: str,
209
+ *,
210
+ kind: str,
211
+ ) -> None:
212
+ prefix = self._prefixes[prefix_as_string]
213
+ assert not prefix.setup
214
+ prefix.setup = True
215
+ if not requirements:
216
+ return
217
+ self._install_requirements(
218
+ get_runnable_pip(),
219
+ finder,
220
+ requirements,
221
+ prefix,
222
+ kind=kind,
223
+ )
224
+
225
+ @staticmethod
226
+ def _install_requirements(
227
+ pip_runnable: str,
228
+ finder: "PackageFinder",
229
+ requirements: Iterable[str],
230
+ prefix: _Prefix,
231
+ *,
232
+ kind: str,
233
+ ) -> None:
234
+ args: List[str] = [
235
+ sys.executable,
236
+ pip_runnable,
237
+ "install",
238
+ "--ignore-installed",
239
+ "--no-user",
240
+ "--prefix",
241
+ prefix.path,
242
+ "--no-warn-script-location",
243
+ ]
244
+ if logger.getEffectiveLevel() <= logging.DEBUG:
245
+ args.append("-v")
246
+ for format_control in ("no_binary", "only_binary"):
247
+ formats = getattr(finder.format_control, format_control)
248
+ args.extend(
249
+ (
250
+ "--" + format_control.replace("_", "-"),
251
+ ",".join(sorted(formats or {":none:"})),
252
+ )
253
+ )
254
+
255
+ index_urls = finder.index_urls
256
+ if index_urls:
257
+ args.extend(["-i", index_urls[0]])
258
+ for extra_index in index_urls[1:]:
259
+ args.extend(["--extra-index-url", extra_index])
260
+ else:
261
+ args.append("--no-index")
262
+ for link in finder.find_links:
263
+ args.extend(["--find-links", link])
264
+
265
+ for host in finder.trusted_hosts:
266
+ args.extend(["--trusted-host", host])
267
+ if finder.allow_all_prereleases:
268
+ args.append("--pre")
269
+ if finder.prefer_binary:
270
+ args.append("--prefer-binary")
271
+ args.append("--")
272
+ args.extend(requirements)
273
+ extra_environ = {"_PIP_STANDALONE_CERT": where()}
274
+ with open_spinner(f"Installing {kind}") as spinner:
275
+ call_subprocess(
276
+ args,
277
+ command_desc=f"pip subprocess to install {kind}",
278
+ spinner=spinner,
279
+ extra_environ=extra_environ,
280
+ )
281
+
282
+
283
+ class NoOpBuildEnvironment(BuildEnvironment):
284
+ """A no-op drop-in replacement for BuildEnvironment"""
285
+
286
+ def __init__(self) -> None:
287
+ pass
288
+
289
+ def __enter__(self) -> None:
290
+ pass
291
+
292
+ def __exit__(
293
+ self,
294
+ exc_type: Optional[Type[BaseException]],
295
+ exc_val: Optional[BaseException],
296
+ exc_tb: Optional[TracebackType],
297
+ ) -> None:
298
+ pass
299
+
300
+ def cleanup(self) -> None:
301
+ pass
302
+
303
+ def install_requirements(
304
+ self,
305
+ finder: "PackageFinder",
306
+ requirements: Iterable[str],
307
+ prefix_as_string: str,
308
+ *,
309
+ kind: str,
310
+ ) -> None:
311
+ raise NotImplementedError()