Akash6776
/

KEN_1

Model card Files Files and versions Community

Akash6776 commited on Jul 4

Commit

d151e9e

verified ·

1 Parent(s): 5c2009a

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +33 -0
17.BangBros.Halloween Special With A Threesome.mih16199.mp4 +3 -0
17.BangBros.Stalking Pussy.bbe16045.mp4 +3 -0
17.Clips4Sale.Cory's Super Heroine Adventures.Super Spy vs Super Villain.mp4 +3 -0
17.DevilsFilm.Mommy Likes To Watch 2.mp4 +3 -0
17.DigitalPlayground.When College Girls Attack.mp4 +3 -0
17.EvilAngel.Porn Pro Drills 20YO Rim Jobber Kenzie.mp4 +3 -0
17.GirlsWay.Biology Exam.mp4 +3 -0
17.LoveHerFeet.Multiplayer.mp4 +3 -0
17.Mofos.Spinner Sucks Cock for Fame.mp4 +3 -0
17.NewSensations.Sexy Little Sister Kenzie Wants To Do It.mp4 +3 -0
18.Brazzers.Selfies With The Dean.mp4 +3 -0
18.BurningAngel.Human Pinata.mp4 +3 -0
18.CherryPimps.Wild Girls Kenzie Reeves And Vina Sky LIVE.mp4 +3 -0
18.Down The Hatch 29.mp4 +3 -0
18.GirlsWay.I Love You Both.mp4 +3 -0
18.GirlsWay.I Love You Too.mp4 +3 -0
18.GirlsWay.Peer Pressure - The Sharing Sisters.mp4 +3 -0
18.LesbianX.Anal Gaping Slumber Party.mp4 +3 -0
18.MissaX.In Love With Daddy.mp4 +3 -0
18.Mofos.A Sneaky Threesome Situation.mp4 +3 -0
18.Mofos.Teen Tastes Horny Stepmom's Pussy.mp4 +3 -0
18.Nubiles-Porn.Spring Break Lake Powell 1.mp4 +3 -0
18.PervCity.Tiny Blonde Teen Kenzie Reeves Has A Big Orgasm.mp4 +3 -0
18.PornFidelity.Losing It.mp4 +3 -0
18.PropertySex.VixenX.Been a While.mp4 +3 -0
18.PureTaboo.Trailer Park Taboo 3.mp4 +3 -0
18.RealityKings.Social Proof.mp4 +3 -0
19.Analized.Is The Ultimate Teenage Anal Slut.mp4 +3 -0
19.ArchangelVideo.My Pussy Is Dripping.mp4 +3 -0
19.BlowPass.Kenzie Loves Cock.mp4 +3 -0
19.Deeper.Dare You.101231.mp4 +3 -0
19.DogFartNetwork.BlackMeatWhiteFeet.And Big Black Cock.mp4 +3 -0
19.ElegantAngel.It'S A Family Thing 3.mp4 +3 -0
venv/lib/python3.12/site-packages/filelock-3.18.0.dist-info/licenses/LICENSE +24 -0
venv/lib/python3.12/site-packages/fsspec/__init__.py +71 -0
venv/lib/python3.12/site-packages/fsspec/_version.py +21 -0
venv/lib/python3.12/site-packages/fsspec/archive.py +75 -0
venv/lib/python3.12/site-packages/fsspec/asyn.py +1110 -0
venv/lib/python3.12/site-packages/fsspec/caching.py +1005 -0
venv/lib/python3.12/site-packages/fsspec/callbacks.py +324 -0
venv/lib/python3.12/site-packages/fsspec/compression.py +175 -0
venv/lib/python3.12/site-packages/fsspec/config.py +131 -0
venv/lib/python3.12/site-packages/fsspec/conftest.py +55 -0
venv/lib/python3.12/site-packages/fsspec/core.py +743 -0
venv/lib/python3.12/site-packages/fsspec/dircache.py +98 -0
venv/lib/python3.12/site-packages/fsspec/exceptions.py +18 -0
venv/lib/python3.12/site-packages/fsspec/fuse.py +324 -0
venv/lib/python3.12/site-packages/fsspec/generic.py +395 -0
venv/lib/python3.12/site-packages/fsspec/gui.py +416 -0

.gitattributes CHANGED Viewed

@@ -103,3 +103,36 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 18.Clips4Sale.Primal's[[:space:]]Taboo[[:space:]]Sex.The[[:space:]]Sister[[:space:]]That[[:space:]]Got[[:space:]]What[[:space:]]She[[:space:]]Wanted.mp4 filter=lfs diff=lfs merge=lfs -text
 18.POVD.Thanksgiving[[:space:]]Creampie.mp4 filter=lfs diff=lfs merge=lfs -text
 18.NewSensations.Kenzie[[:space:]]Waits[[:space:]]For[[:space:]]Daddy[[:space:]]To[[:space:]]Cum.mp4 filter=lfs diff=lfs merge=lfs -text

 18.Clips4Sale.Primal's[[:space:]]Taboo[[:space:]]Sex.The[[:space:]]Sister[[:space:]]That[[:space:]]Got[[:space:]]What[[:space:]]She[[:space:]]Wanted.mp4 filter=lfs diff=lfs merge=lfs -text
 18.POVD.Thanksgiving[[:space:]]Creampie.mp4 filter=lfs diff=lfs merge=lfs -text
 18.NewSensations.Kenzie[[:space:]]Waits[[:space:]]For[[:space:]]Daddy[[:space:]]To[[:space:]]Cum.mp4 filter=lfs diff=lfs merge=lfs -text
+18.Nubiles-Porn.Spring[[:space:]]Break[[:space:]]Lake[[:space:]]Powell[[:space:]]1.mp4 filter=lfs diff=lfs merge=lfs -text
+18.Down[[:space:]]The[[:space:]]Hatch[[:space:]]29.mp4 filter=lfs diff=lfs merge=lfs -text
+19.ElegantAngel.It'S[[:space:]]A[[:space:]]Family[[:space:]]Thing[[:space:]]3.mp4 filter=lfs diff=lfs merge=lfs -text
+19.BlowPass.Kenzie[[:space:]]Loves[[:space:]]Cock.mp4 filter=lfs diff=lfs merge=lfs -text
+18.MissaX.In[[:space:]]Love[[:space:]]With[[:space:]]Daddy.mp4 filter=lfs diff=lfs merge=lfs -text
+18.CherryPimps.Wild[[:space:]]Girls[[:space:]]Kenzie[[:space:]]Reeves[[:space:]]And[[:space:]]Vina[[:space:]]Sky[[:space:]]LIVE.mp4 filter=lfs diff=lfs merge=lfs -text
+17.EvilAngel.Porn[[:space:]]Pro[[:space:]]Drills[[:space:]]20YO[[:space:]]Rim[[:space:]]Jobber[[:space:]]Kenzie.mp4 filter=lfs diff=lfs merge=lfs -text
+18.GirlsWay.Peer[[:space:]]Pressure[[:space:]]-[[:space:]]The[[:space:]]Sharing[[:space:]]Sisters.mp4 filter=lfs diff=lfs merge=lfs -text
+19.ArchangelVideo.My[[:space:]]Pussy[[:space:]]Is[[:space:]]Dripping.mp4 filter=lfs diff=lfs merge=lfs -text
+18.Mofos.Teen[[:space:]]Tastes[[:space:]]Horny[[:space:]]Stepmom's[[:space:]]Pussy.mp4 filter=lfs diff=lfs merge=lfs -text
+19.Analized.Is[[:space:]]The[[:space:]]Ultimate[[:space:]]Teenage[[:space:]]Anal[[:space:]]Slut.mp4 filter=lfs diff=lfs merge=lfs -text
+17.GirlsWay.Biology[[:space:]]Exam.mp4 filter=lfs diff=lfs merge=lfs -text
+18.GirlsWay.I[[:space:]]Love[[:space:]]You[[:space:]]Too.mp4 filter=lfs diff=lfs merge=lfs -text
+18.Brazzers.Selfies[[:space:]]With[[:space:]]The[[:space:]]Dean.mp4 filter=lfs diff=lfs merge=lfs -text
+18.PornFidelity.Losing[[:space:]]It.mp4 filter=lfs diff=lfs merge=lfs -text
+18.PropertySex.VixenX.Been[[:space:]]a[[:space:]]While.mp4 filter=lfs diff=lfs merge=lfs -text
+18.RealityKings.Social[[:space:]]Proof.mp4 filter=lfs diff=lfs merge=lfs -text
+19.DogFartNetwork.BlackMeatWhiteFeet.And[[:space:]]Big[[:space:]]Black[[:space:]]Cock.mp4 filter=lfs diff=lfs merge=lfs -text
+17.NewSensations.Sexy[[:space:]]Little[[:space:]]Sister[[:space:]]Kenzie[[:space:]]Wants[[:space:]]To[[:space:]]Do[[:space:]]It.mp4 filter=lfs diff=lfs merge=lfs -text
+17.BangBros.Stalking[[:space:]]Pussy.bbe16045.mp4 filter=lfs diff=lfs merge=lfs -text
+18.BurningAngel.Human[[:space:]]Pinata.mp4 filter=lfs diff=lfs merge=lfs -text
+17.DigitalPlayground.When[[:space:]]College[[:space:]]Girls[[:space:]]Attack.mp4 filter=lfs diff=lfs merge=lfs -text
+17.BangBros.Halloween[[:space:]]Special[[:space:]]With[[:space:]]A[[:space:]]Threesome.mih16199.mp4 filter=lfs diff=lfs merge=lfs -text
+17.Clips4Sale.Cory's[[:space:]]Super[[:space:]]Heroine[[:space:]]Adventures.Super[[:space:]]Spy[[:space:]]vs[[:space:]]Super[[:space:]]Villain.mp4 filter=lfs diff=lfs merge=lfs -text
+17.DevilsFilm.Mommy[[:space:]]Likes[[:space:]]To[[:space:]]Watch[[:space:]]2.mp4 filter=lfs diff=lfs merge=lfs -text
+18.LesbianX.Anal[[:space:]]Gaping[[:space:]]Slumber[[:space:]]Party.mp4 filter=lfs diff=lfs merge=lfs -text
+17.LoveHerFeet.Multiplayer.mp4 filter=lfs diff=lfs merge=lfs -text
+18.PureTaboo.Trailer[[:space:]]Park[[:space:]]Taboo[[:space:]]3.mp4 filter=lfs diff=lfs merge=lfs -text
+18.PervCity.Tiny[[:space:]]Blonde[[:space:]]Teen[[:space:]]Kenzie[[:space:]]Reeves[[:space:]]Has[[:space:]]A[[:space:]]Big[[:space:]]Orgasm.mp4 filter=lfs diff=lfs merge=lfs -text
+18.GirlsWay.I[[:space:]]Love[[:space:]]You[[:space:]]Both.mp4 filter=lfs diff=lfs merge=lfs -text
+19.Deeper.Dare[[:space:]]You.101231.mp4 filter=lfs diff=lfs merge=lfs -text
+17.Mofos.Spinner[[:space:]]Sucks[[:space:]]Cock[[:space:]]for[[:space:]]Fame.mp4 filter=lfs diff=lfs merge=lfs -text
+18.Mofos.A[[:space:]]Sneaky[[:space:]]Threesome[[:space:]]Situation.mp4 filter=lfs diff=lfs merge=lfs -text

17.BangBros.Halloween Special With A Threesome.mih16199.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:884e879b96ec030cc8a14e1e7a32a9f65d39a85354ad2bb21d188ab04e278f5b
+size 2422708123

17.BangBros.Stalking Pussy.bbe16045.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f3ea68e1a0c05b5018c53f2a326f7157bac8c8090d065bac6cc2ef6cf0ba19a
+size 1934457211

17.Clips4Sale.Cory's Super Heroine Adventures.Super Spy vs Super Villain.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43034cc231bc539dc50693ec5870ac2257d0b6a10b6ae17599217010be5f6fb9
+size 870873298

17.DevilsFilm.Mommy Likes To Watch 2.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7e298a4ac6d03b2a47ae94ff40e28c23b63971025a6b3ceafe2589c1df0b07e
+size 648854235

17.DigitalPlayground.When College Girls Attack.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36f85fa2a830fafa0f9c8db488c6ab9747667053d3fef0c386e94775a0fbac54
+size 1377442258

17.EvilAngel.Porn Pro Drills 20YO Rim Jobber Kenzie.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8b436c5e022e8202b69e9979f447148453d79d42a678f83945b7d23bc351df7
+size 872243786

17.GirlsWay.Biology Exam.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c23a3b48e46e4d430fcb18bc8bf09eb89528b306ba3a6cf52b04755649feb210
+size 1592606077

17.LoveHerFeet.Multiplayer.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8613c7f6f11351f522a5bf70620e551c052c4792ce570657c66280b28c867411
+size 3093689738

17.Mofos.Spinner Sucks Cock for Fame.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fc2687e483e90c4b6f4e4e54116c3c51d8e5191959ff42d58c195fabfff1231
+size 3241790517

17.NewSensations.Sexy Little Sister Kenzie Wants To Do It.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9bb7a31a09c770db451f771fc31bc2dfbe4a1a52a38732ba2d89af3173dd7ff
+size 3461941399

18.Brazzers.Selfies With The Dean.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c49d7ad91b9dde0e1f7ecbbee569b8097576f3cc2309920fcb881b5c39b44281
+size 1276373412

18.BurningAngel.Human Pinata.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a4885a26346a2c582ff867896d250cc98fe001bf4c45174a00173071256ecbf
+size 1038731154

18.CherryPimps.Wild Girls Kenzie Reeves And Vina Sky LIVE.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d1cb8e43eb18a4dfc7af97ff4f6a863d75b407021068ecf3fab86eab1913742
+size 2245370403

18.Down The Hatch 29.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e7d0fea1b35b4b04cef496a03c9f23dcb6c0e3c6565cc5f1d59994fb8169d4e
+size 392756777

18.GirlsWay.I Love You Both.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca0736f08f5d81d6c6e66c992593094c159d798435cbbd692a3c1eaa51113837
+size 1980038412

18.GirlsWay.I Love You Too.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:896db7f0a41b9c1d84d06aa92f5496bcc6f6e14a4e0376abeb340344f4e6c2d1
+size 2152267815

18.GirlsWay.Peer Pressure - The Sharing Sisters.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0547799d4e8c871884e338174dc002df3ec5946f54fe6353a1d07ffd1e17136
+size 2978343703

18.LesbianX.Anal Gaping Slumber Party.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33336790c8d0c09d9906bb51ae56f221d9225977573f3d592568a4acb6eb2b91
+size 1370767697

18.MissaX.In Love With Daddy.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7168383cafc414bfbf65c11d51a2cfbd1cedf81d187007c29987e440093968dd
+size 1567924147

18.Mofos.A Sneaky Threesome Situation.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9ac5c78b37130309d6611d6df4a3a682b9be19def52c46ba8c6affc88b49ced
+size 2553280872

18.Mofos.Teen Tastes Horny Stepmom's Pussy.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d1164cdc046dc7460372352ed5926b3aea7de4f672887f3b18a624004155b7b
+size 3301988676

18.Nubiles-Porn.Spring Break Lake Powell 1.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c4a91615484fd7a480576e77ce80ec4bea136be0a8b972951064b3c9cf34b1f
+size 1786930517

18.PervCity.Tiny Blonde Teen Kenzie Reeves Has A Big Orgasm.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:165f672238f896968ddddb4e96c28db32f373147b590fd8b3db67817b8371292
+size 944538607

18.PornFidelity.Losing It.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5e73aed9043d81b40c137869eeb68d81ab7a45d72df52b3809f4783a885eef5
+size 1017159752

18.PropertySex.VixenX.Been a While.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83995f5915320416fb2dea76cca22aa7b9c2cd53685bc8d7ddfc259a6c1097c1
+size 3520856768

18.PureTaboo.Trailer Park Taboo 3.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2d399deb76ce6f713f9a9f1af86701fc6777a5d628119a23666819e62dd7510
+size 1237153374

18.RealityKings.Social Proof.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:009864a80f69aa32706b2277dde75504ed0eb4dfb56a05721a4bbcb21f957f13
+size 3091314409

19.Analized.Is The Ultimate Teenage Anal Slut.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc240588b4f60a7542ef89fa2a55a85d91aeb6c0afcb7fed8c5e94965652aecc
+size 2621108271

19.ArchangelVideo.My Pussy Is Dripping.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8f9393e755d2618e2bc21c8617a0ab223ebb78f7dbf7c642d9cdd479b5670fe
+size 1916687999

19.BlowPass.Kenzie Loves Cock.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62c78684762704b653ed11810e6d9522fe196d95fe576049ebc42fd3dd5cad21
+size 2174866312

19.Deeper.Dare You.101231.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3564ff804379de6fda587031bdb9bf565f656fd08dec1b36b1081c12aafac10f
+size 3373955368

19.DogFartNetwork.BlackMeatWhiteFeet.And Big Black Cock.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21b54744365838fda8d13819af758b20a72cd105c8772aa3f169b17117bcf850
+size 3124523333

19.ElegantAngel.It'S A Family Thing 3.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10c8f94415bc763c56e16ca16862386c01aaa4fe597e4ed6a2b0a518594671bf
+size 604433859

venv/lib/python3.12/site-packages/filelock-3.18.0.dist-info/licenses/LICENSE ADDED Viewed

	@@ -0,0 +1,24 @@

+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+For more information, please refer to <http://unlicense.org>

venv/lib/python3.12/site-packages/fsspec/__init__.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from . import caching
+from ._version import __version__  # noqa: F401
+from .callbacks import Callback
+from .compression import available_compressions
+from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
+from .exceptions import FSTimeoutError
+from .mapping import FSMap, get_mapper
+from .registry import (
+    available_protocols,
+    filesystem,
+    get_filesystem_class,
+    register_implementation,
+    registry,
+)
+from .spec import AbstractFileSystem
+__all__ = [
+    "AbstractFileSystem",
+    "FSTimeoutError",
+    "FSMap",
+    "filesystem",
+    "register_implementation",
+    "get_filesystem_class",
+    "get_fs_token_paths",
+    "get_mapper",
+    "open",
+    "open_files",
+    "open_local",
+    "registry",
+    "caching",
+    "Callback",
+    "available_protocols",
+    "available_compressions",
+    "url_to_fs",
+]
+def process_entries():
+    try:
+        from importlib.metadata import entry_points
+    except ImportError:
+        return
+    if entry_points is not None:
+        try:
+            eps = entry_points()
+        except TypeError:
+            pass  # importlib-metadata < 0.8
+        else:
+            if hasattr(eps, "select"):  # Python 3.10+ / importlib_metadata >= 3.9.0
+                specs = eps.select(group="fsspec.specs")
+            else:
+                specs = eps.get("fsspec.specs", [])
+            registered_names = {}
+            for spec in specs:
+                err_msg = f"Unable to load filesystem from {spec}"
+                name = spec.name
+                if name in registered_names:
+                    continue
+                registered_names[name] = True
+                register_implementation(
+                    name,
+                    spec.value.replace(":", "."),
+                    errtxt=err_msg,
+                    # We take our implementations as the ones to overload with if
+                    # for some reason we encounter some, may be the same, already
+                    # registered
+                    clobber=True,
+                )
+process_entries()

venv/lib/python3.12/site-packages/fsspec/_version.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# file generated by setuptools-scm
+# don't change, don't track in version control
+__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple
+    from typing import Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+__version__ = version = '2025.5.1'
+__version_tuple__ = version_tuple = (2025, 5, 1)

venv/lib/python3.12/site-packages/fsspec/archive.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import operator
+from fsspec import AbstractFileSystem
+from fsspec.utils import tokenize
+class AbstractArchiveFileSystem(AbstractFileSystem):
+    """
+    A generic superclass for implementing Archive-based filesystems.
+    Currently, it is shared amongst
+    :class:`~fsspec.implementations.zip.ZipFileSystem`,
+    :class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
+    :class:`~fsspec.implementations.tar.TarFileSystem`.
+    """
+    def __str__(self):
+        return f"<Archive-like object {type(self).__name__} at {id(self)}>"
+    __repr__ = __str__
+    def ukey(self, path):
+        return tokenize(path, self.fo, self.protocol)
+    def _all_dirnames(self, paths):
+        """Returns *all* directory names for each path in paths, including intermediate
+        ones.
+        Parameters
+        ----------
+        paths: Iterable of path strings
+        """
+        if len(paths) == 0:
+            return set()
+        dirnames = {self._parent(path) for path in paths} - {self.root_marker}
+        return dirnames | self._all_dirnames(dirnames)
+    def info(self, path, **kwargs):
+        self._get_dirs()
+        path = self._strip_protocol(path)
+        if path in {"", "/"} and self.dir_cache:
+            return {"name": "", "type": "directory", "size": 0}
+        if path in self.dir_cache:
+            return self.dir_cache[path]
+        elif path + "/" in self.dir_cache:
+            return self.dir_cache[path + "/"]
+        else:
+            raise FileNotFoundError(path)
+    def ls(self, path, detail=True, **kwargs):
+        self._get_dirs()
+        paths = {}
+        for p, f in self.dir_cache.items():
+            p = p.rstrip("/")
+            if "/" in p:
+                root = p.rsplit("/", 1)[0]
+            else:
+                root = ""
+            if root == path.rstrip("/"):
+                paths[p] = f
+            elif all(
+                (a == b)
+                for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
+            ):
+                # root directory entry
+                ppath = p.rstrip("/").split("/", 1)[0]
+                if ppath not in paths:
+                    out = {"name": ppath, "size": 0, "type": "directory"}
+                    paths[ppath] = out
+        if detail:
+            out = sorted(paths.values(), key=operator.itemgetter("name"))
+            return out
+        else:
+            return sorted(paths)

venv/lib/python3.12/site-packages/fsspec/asyn.py ADDED Viewed

	@@ -0,0 +1,1110 @@

+import asyncio
+import asyncio.events
+import functools
+import inspect
+import io
+import numbers
+import os
+import re
+import threading
+from contextlib import contextmanager
+from glob import has_magic
+from typing import TYPE_CHECKING, Iterable
+from .callbacks import DEFAULT_CALLBACK
+from .exceptions import FSTimeoutError
+from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
+from .spec import AbstractBufferedFile, AbstractFileSystem
+from .utils import glob_translate, is_exception, other_paths
+private = re.compile("_[^_]")
+iothread = [None]  # dedicated fsspec IO thread
+loop = [None]  # global event loop for any non-async instance
+_lock = None  # global lock placeholder
+get_running_loop = asyncio.get_running_loop
+def get_lock():
+    """Allocate or return a threading lock.
+    The lock is allocated on first use to allow setting one lock per forked process.
+    """
+    global _lock
+    if not _lock:
+        _lock = threading.Lock()
+    return _lock
+def reset_lock():
+    """Reset the global lock.
+    This should be called only on the init of a forked process to reset the lock to
+    None, enabling the new forked process to get a new lock.
+    """
+    global _lock
+    iothread[0] = None
+    loop[0] = None
+    _lock = None
+async def _runner(event, coro, result, timeout=None):
+    timeout = timeout if timeout else None  # convert 0 or 0.0 to None
+    if timeout is not None:
+        coro = asyncio.wait_for(coro, timeout=timeout)
+    try:
+        result[0] = await coro
+    except Exception as ex:
+        result[0] = ex
+    finally:
+        event.set()
+def sync(loop, func, *args, timeout=None, **kwargs):
+    """
+    Make loop run coroutine until it returns. Runs in other thread
+    Examples
+    --------
+    >>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
+                         timeout=timeout, **kwargs)
+    """
+    timeout = timeout if timeout else None  # convert 0 or 0.0 to None
+    # NB: if the loop is not running *yet*, it is OK to submit work
+    # and we will wait for it
+    if loop is None or loop.is_closed():
+        raise RuntimeError("Loop is not running")
+    try:
+        loop0 = asyncio.events.get_running_loop()
+        if loop0 is loop:
+            raise NotImplementedError("Calling sync() from within a running loop")
+    except NotImplementedError:
+        raise
+    except RuntimeError:
+        pass
+    coro = func(*args, **kwargs)
+    result = [None]
+    event = threading.Event()
+    asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
+    while True:
+        # this loops allows thread to get interrupted
+        if event.wait(1):
+            break
+        if timeout is not None:
+            timeout -= 1
+            if timeout < 0:
+                raise FSTimeoutError
+    return_result = result[0]
+    if isinstance(return_result, asyncio.TimeoutError):
+        # suppress asyncio.TimeoutError, raise FSTimeoutError
+        raise FSTimeoutError from return_result
+    elif isinstance(return_result, BaseException):
+        raise return_result
+    else:
+        return return_result
+def sync_wrapper(func, obj=None):
+    """Given a function, make so can be called in blocking contexts
+    Leave obj=None if defining within a class. Pass the instance if attaching
+    as an attribute of the instance.
+    """
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        self = obj or args[0]
+        return sync(self.loop, func, *args, **kwargs)
+    return wrapper
+@contextmanager
+def _selector_policy():
+    original_policy = asyncio.get_event_loop_policy()
+    try:
+        if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"):
+            asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+        yield
+    finally:
+        asyncio.set_event_loop_policy(original_policy)
+def get_loop():
+    """Create or return the default fsspec IO loop
+    The loop will be running on a separate thread.
+    """
+    if loop[0] is None:
+        with get_lock():
+            # repeat the check just in case the loop got filled between the
+            # previous two calls from another thread
+            if loop[0] is None:
+                with _selector_policy():
+                    loop[0] = asyncio.new_event_loop()
+                th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
+                th.daemon = True
+                th.start()
+                iothread[0] = th
+    return loop[0]
+def reset_after_fork():
+    global lock
+    loop[0] = None
+    iothread[0] = None
+    lock = None
+if hasattr(os, "register_at_fork"):
+    # should be posix; this will do nothing for spawn or forkserver subprocesses
+    os.register_at_fork(after_in_child=reset_after_fork)
+if TYPE_CHECKING:
+    import resource
+    ResourceError = resource.error
+else:
+    try:
+        import resource
+    except ImportError:
+        resource = None
+        ResourceError = OSError
+    else:
+        ResourceError = getattr(resource, "error", OSError)
+_DEFAULT_BATCH_SIZE = 128
+_NOFILES_DEFAULT_BATCH_SIZE = 1280
+def _get_batch_size(nofiles=False):
+    from fsspec.config import conf
+    if nofiles:
+        if "nofiles_gather_batch_size" in conf:
+            return conf["nofiles_gather_batch_size"]
+    else:
+        if "gather_batch_size" in conf:
+            return conf["gather_batch_size"]
+    if nofiles:
+        return _NOFILES_DEFAULT_BATCH_SIZE
+    if resource is None:
+        return _DEFAULT_BATCH_SIZE
+    try:
+        soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
+    except (ImportError, ValueError, ResourceError):
+        return _DEFAULT_BATCH_SIZE
+    if soft_limit == resource.RLIM_INFINITY:
+        return -1
+    else:
+        return soft_limit // 8
+def running_async() -> bool:
+    """Being executed by an event loop?"""
+    try:
+        asyncio.get_running_loop()
+        return True
+    except RuntimeError:
+        return False
+async def _run_coros_in_chunks(
+    coros,
+    batch_size=None,
+    callback=DEFAULT_CALLBACK,
+    timeout=None,
+    return_exceptions=False,
+    nofiles=False,
+):
+    """Run the given coroutines in  chunks.
+    Parameters
+    ----------
+    coros: list of coroutines to run
+    batch_size: int or None
+        Number of coroutines to submit/wait on simultaneously.
+        If -1, then it will not be any throttling. If
+        None, it will be inferred from _get_batch_size()
+    callback: fsspec.callbacks.Callback instance
+        Gets a relative_update when each coroutine completes
+    timeout: number or None
+        If given, each coroutine times out after this time. Note that, since
+        there are multiple batches, the total run time of this function will in
+        general be longer
+    return_exceptions: bool
+        Same meaning as in asyncio.gather
+    nofiles: bool
+        If inferring the batch_size, does this operation involve local files?
+        If yes, you normally expect smaller batches.
+    """
+    if batch_size is None:
+        batch_size = _get_batch_size(nofiles=nofiles)
+    if batch_size == -1:
+        batch_size = len(coros)
+    assert batch_size > 0
+    async def _run_coro(coro, i):
+        try:
+            return await asyncio.wait_for(coro, timeout=timeout), i
+        except Exception as e:
+            if not return_exceptions:
+                raise
+            return e, i
+        finally:
+            callback.relative_update(1)
+    i = 0
+    n = len(coros)
+    results = [None] * n
+    pending = set()
+    while pending or i < n:
+        while len(pending) < batch_size and i < n:
+            pending.add(asyncio.ensure_future(_run_coro(coros[i], i)))
+            i += 1
+        if not pending:
+            break
+        done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+        while done:
+            result, k = await done.pop()
+            results[k] = result
+    return results
+# these methods should be implemented as async by any async-able backend
+async_methods = [
+    "_ls",
+    "_cat_file",
+    "_get_file",
+    "_put_file",
+    "_rm_file",
+    "_cp_file",
+    "_pipe_file",
+    "_expand_path",
+    "_info",
+    "_isfile",
+    "_isdir",
+    "_exists",
+    "_walk",
+    "_glob",
+    "_find",
+    "_du",
+    "_size",
+    "_mkdir",
+    "_makedirs",
+]
+class AsyncFileSystem(AbstractFileSystem):
+    """Async file operations, default implementations
+    Passes bulk operations to asyncio.gather for concurrent operation.
+    Implementations that have concurrent batch operations and/or async methods
+    should inherit from this class instead of AbstractFileSystem. Docstrings are
+    copied from the un-underscored method in AbstractFileSystem, if not given.
+    """
+    # note that methods do not have docstring here; they will be copied
+    # for _* methods and inferred for overridden methods.
+    async_impl = True
+    mirror_sync_methods = True
+    disable_throttling = False
+    def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
+        self.asynchronous = asynchronous
+        self._pid = os.getpid()
+        if not asynchronous:
+            self._loop = loop or get_loop()
+        else:
+            self._loop = None
+        self.batch_size = batch_size
+        super().__init__(*args, **kwargs)
+    @property
+    def loop(self):
+        if self._pid != os.getpid():
+            raise RuntimeError("This class is not fork-safe")
+        return self._loop
+    async def _rm_file(self, path, **kwargs):
+        raise NotImplementedError
+    async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
+        # TODO: implement on_error
+        batch_size = batch_size or self.batch_size
+        path = await self._expand_path(path, recursive=recursive)
+        return await _run_coros_in_chunks(
+            [self._rm_file(p, **kwargs) for p in reversed(path)],
+            batch_size=batch_size,
+            nofiles=True,
+        )
+    async def _cp_file(self, path1, path2, **kwargs):
+        raise NotImplementedError
+    async def _mv_file(self, path1, path2):
+        await self._cp_file(path1, path2)
+        await self._rm_file(path1)
+    async def _copy(
+        self,
+        path1,
+        path2,
+        recursive=False,
+        on_error=None,
+        maxdepth=None,
+        batch_size=None,
+        **kwargs,
+    ):
+        if on_error is None and recursive:
+            on_error = "ignore"
+        elif on_error is None:
+            on_error = "raise"
+        if isinstance(path1, list) and isinstance(path2, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            paths1 = path1
+            paths2 = path2
+        else:
+            source_is_str = isinstance(path1, str)
+            paths1 = await self._expand_path(
+                path1, maxdepth=maxdepth, recursive=recursive
+            )
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                paths1 = [
+                    p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
+                ]
+                if not paths1:
+                    return
+            source_is_file = len(paths1) == 1
+            dest_is_dir = isinstance(path2, str) and (
+                trailing_sep(path2) or await self._isdir(path2)
+            )
+            exists = source_is_str and (
+                (has_magic(path1) and source_is_file)
+                or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
+            )
+            paths2 = other_paths(
+                paths1,
+                path2,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        batch_size = batch_size or self.batch_size
+        coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
+        result = await _run_coros_in_chunks(
+            coros, batch_size=batch_size, return_exceptions=True, nofiles=True
+        )
+        for ex in filter(is_exception, result):
+            if on_error == "ignore" and isinstance(ex, FileNotFoundError):
+                continue
+            raise ex
+    async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
+        raise NotImplementedError
+    async def _pipe(self, path, value=None, batch_size=None, **kwargs):
+        if isinstance(path, str):
+            path = {path: value}
+        batch_size = batch_size or self.batch_size
+        return await _run_coros_in_chunks(
+            [self._pipe_file(k, v, **kwargs) for k, v in path.items()],
+            batch_size=batch_size,
+            nofiles=True,
+        )
+    async def _process_limits(self, url, start, end):
+        """Helper for "Range"-based _cat_file"""
+        size = None
+        suff = False
+        if start is not None and start < 0:
+            # if start is negative and end None, end is the "suffix length"
+            if end is None:
+                end = -start
+                start = ""
+                suff = True
+            else:
+                size = size or (await self._info(url))["size"]
+                start = size + start
+        elif start is None:
+            start = 0
+        if not suff:
+            if end is not None and end < 0:
+                if start is not None:
+                    size = size or (await self._info(url))["size"]
+                    end = size + end
+            elif end is None:
+                end = ""
+            if isinstance(end, numbers.Integral):
+                end -= 1  # bytes range is inclusive
+        return f"bytes={start}-{end}"
+    async def _cat_file(self, path, start=None, end=None, **kwargs):
+        raise NotImplementedError
+    async def _cat(
+        self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
+    ):
+        paths = await self._expand_path(path, recursive=recursive)
+        coros = [self._cat_file(path, **kwargs) for path in paths]
+        batch_size = batch_size or self.batch_size
+        out = await _run_coros_in_chunks(
+            coros, batch_size=batch_size, nofiles=True, return_exceptions=True
+        )
+        if on_error == "raise":
+            ex = next(filter(is_exception, out), False)
+            if ex:
+                raise ex
+        if (
+            len(paths) > 1
+            or isinstance(path, list)
+            or paths[0] != self._strip_protocol(path)
+        ):
+            return {
+                k: v
+                for k, v in zip(paths, out)
+                if on_error != "omit" or not is_exception(v)
+            }
+        else:
+            return out[0]
+    async def _cat_ranges(
+        self,
+        paths,
+        starts,
+        ends,
+        max_gap=None,
+        batch_size=None,
+        on_error="return",
+        **kwargs,
+    ):
+        """Get the contents of byte ranges from one or more files
+        Parameters
+        ----------
+        paths: list
+            A list of of filepaths on this filesystems
+        starts, ends: int or list
+            Bytes limits of the read. If using a single int, the same value will be
+            used to read all the specified files.
+        """
+        # TODO: on_error
+        if max_gap is not None:
+            # use utils.merge_offset_ranges
+            raise NotImplementedError
+        if not isinstance(paths, list):
+            raise TypeError
+        if not isinstance(starts, Iterable):
+            starts = [starts] * len(paths)
+        if not isinstance(ends, Iterable):
+            ends = [ends] * len(paths)
+        if len(starts) != len(paths) or len(ends) != len(paths):
+            raise ValueError
+        coros = [
+            self._cat_file(p, start=s, end=e, **kwargs)
+            for p, s, e in zip(paths, starts, ends)
+        ]
+        batch_size = batch_size or self.batch_size
+        return await _run_coros_in_chunks(
+            coros, batch_size=batch_size, nofiles=True, return_exceptions=True
+        )
+    async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
+        raise NotImplementedError
+    async def _put(
+        self,
+        lpath,
+        rpath,
+        recursive=False,
+        callback=DEFAULT_CALLBACK,
+        batch_size=None,
+        maxdepth=None,
+        **kwargs,
+    ):
+        """Copy file(s) from local.
+        Copies a specific file or tree of files (if recursive=True). If rpath
+        ends with a "/", it will be assumed to be a directory, and target files
+        will go within.
+        The put_file method will be called concurrently on a batch of files. The
+        batch_size option can configure the amount of futures that can be executed
+        at the same time. If it is -1, then all the files will be uploaded concurrently.
+        The default can be set for this instance by passing "batch_size" in the
+        constructor, or for all instances by setting the "gather_batch_size" key
+        in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
+        """
+        if isinstance(lpath, list) and isinstance(rpath, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            rpaths = rpath
+            lpaths = lpath
+        else:
+            source_is_str = isinstance(lpath, str)
+            if source_is_str:
+                lpath = make_path_posix(lpath)
+            fs = LocalFileSystem()
+            lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
+                if not lpaths:
+                    return
+            source_is_file = len(lpaths) == 1
+            dest_is_dir = isinstance(rpath, str) and (
+                trailing_sep(rpath) or await self._isdir(rpath)
+            )
+            rpath = self._strip_protocol(rpath)
+            exists = source_is_str and (
+                (has_magic(lpath) and source_is_file)
+                or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
+            )
+            rpaths = other_paths(
+                lpaths,
+                rpath,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        is_dir = {l: os.path.isdir(l) for l in lpaths}
+        rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
+        file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
+        await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
+        batch_size = batch_size or self.batch_size
+        coros = []
+        callback.set_size(len(file_pairs))
+        for lfile, rfile in file_pairs:
+            put_file = callback.branch_coro(self._put_file)
+            coros.append(put_file(lfile, rfile, **kwargs))
+        return await _run_coros_in_chunks(
+            coros, batch_size=batch_size, callback=callback
+        )
+    async def _get_file(self, rpath, lpath, **kwargs):
+        raise NotImplementedError
+    async def _get(
+        self,
+        rpath,
+        lpath,
+        recursive=False,
+        callback=DEFAULT_CALLBACK,
+        maxdepth=None,
+        **kwargs,
+    ):
+        """Copy file(s) to local.
+        Copies a specific file or tree of files (if recursive=True). If lpath
+        ends with a "/", it will be assumed to be a directory, and target files
+        will go within. Can submit a list of paths, which may be glob-patterns
+        and will be expanded.
+        The get_file method will be called concurrently on a batch of files. The
+        batch_size option can configure the amount of futures that can be executed
+        at the same time. If it is -1, then all the files will be uploaded concurrently.
+        The default can be set for this instance by passing "batch_size" in the
+        constructor, or for all instances by setting the "gather_batch_size" key
+        in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
+        """
+        if isinstance(lpath, list) and isinstance(rpath, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            rpaths = rpath
+            lpaths = lpath
+        else:
+            source_is_str = isinstance(rpath, str)
+            # First check for rpath trailing slash as _strip_protocol removes it.
+            source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
+            rpath = self._strip_protocol(rpath)
+            rpaths = await self._expand_path(
+                rpath, recursive=recursive, maxdepth=maxdepth
+            )
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                rpaths = [
+                    p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
+                ]
+                if not rpaths:
+                    return
+            lpath = make_path_posix(lpath)
+            source_is_file = len(rpaths) == 1
+            dest_is_dir = isinstance(lpath, str) and (
+                trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
+            )
+            exists = source_is_str and (
+                (has_magic(rpath) and source_is_file)
+                or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
+            )
+            lpaths = other_paths(
+                rpaths,
+                lpath,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
+        batch_size = kwargs.pop("batch_size", self.batch_size)
+        coros = []
+        callback.set_size(len(lpaths))
+        for lpath, rpath in zip(lpaths, rpaths):
+            get_file = callback.branch_coro(self._get_file)
+            coros.append(get_file(rpath, lpath, **kwargs))
+        return await _run_coros_in_chunks(
+            coros, batch_size=batch_size, callback=callback
+        )
+    async def _isfile(self, path):
+        try:
+            return (await self._info(path))["type"] == "file"
+        except:  # noqa: E722
+            return False
+    async def _isdir(self, path):
+        try:
+            return (await self._info(path))["type"] == "directory"
+        except OSError:
+            return False
+    async def _size(self, path):
+        return (await self._info(path)).get("size", None)
+    async def _sizes(self, paths, batch_size=None):
+        batch_size = batch_size or self.batch_size
+        return await _run_coros_in_chunks(
+            [self._size(p) for p in paths], batch_size=batch_size
+        )
+    async def _exists(self, path, **kwargs):
+        try:
+            await self._info(path, **kwargs)
+            return True
+        except FileNotFoundError:
+            return False
+    async def _info(self, path, **kwargs):
+        raise NotImplementedError
+    async def _ls(self, path, detail=True, **kwargs):
+        raise NotImplementedError
+    async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        path = self._strip_protocol(path)
+        full_dirs = {}
+        dirs = {}
+        files = {}
+        detail = kwargs.pop("detail", False)
+        try:
+            listing = await self._ls(path, detail=True, **kwargs)
+        except (FileNotFoundError, OSError) as e:
+            if on_error == "raise":
+                raise
+            elif callable(on_error):
+                on_error(e)
+            if detail:
+                yield path, {}, {}
+            else:
+                yield path, [], []
+            return
+        for info in listing:
+            # each info name must be at least [path]/part , but here
+            # we check also for names like [path]/part/
+            pathname = info["name"].rstrip("/")
+            name = pathname.rsplit("/", 1)[-1]
+            if info["type"] == "directory" and pathname != path:
+                # do not include "self" path
+                full_dirs[name] = pathname
+                dirs[name] = info
+            elif pathname == path:
+                # file-like with same name as give path
+                files[""] = info
+            else:
+                files[name] = info
+        if detail:
+            yield path, dirs, files
+        else:
+            yield path, list(dirs), list(files)
+        if maxdepth is not None:
+            maxdepth -= 1
+            if maxdepth < 1:
+                return
+        for d in dirs:
+            async for _ in self._walk(
+                full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
+            ):
+                yield _
+    async def _glob(self, path, maxdepth=None, **kwargs):
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        import re
+        seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
+        ends_with_sep = path.endswith(seps)  # _strip_protocol strips trailing slash
+        path = self._strip_protocol(path)
+        append_slash_to_dirname = ends_with_sep or path.endswith(
+            tuple(sep + "**" for sep in seps)
+        )
+        idx_star = path.find("*") if path.find("*") >= 0 else len(path)
+        idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
+        idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
+        min_idx = min(idx_star, idx_qmark, idx_brace)
+        detail = kwargs.pop("detail", False)
+        if not has_magic(path):
+            if await self._exists(path, **kwargs):
+                if not detail:
+                    return [path]
+                else:
+                    return {path: await self._info(path, **kwargs)}
+            else:
+                if not detail:
+                    return []  # glob of non-existent returns empty
+                else:
+                    return {}
+        elif "/" in path[:min_idx]:
+            min_idx = path[:min_idx].rindex("/")
+            root = path[: min_idx + 1]
+            depth = path[min_idx + 1 :].count("/") + 1
+        else:
+            root = ""
+            depth = path[min_idx + 1 :].count("/") + 1
+        if "**" in path:
+            if maxdepth is not None:
+                idx_double_stars = path.find("**")
+                depth_double_stars = path[idx_double_stars:].count("/") + 1
+                depth = depth - depth_double_stars + maxdepth
+            else:
+                depth = None
+        allpaths = await self._find(
+            root, maxdepth=depth, withdirs=True, detail=True, **kwargs
+        )
+        pattern = glob_translate(path + ("/" if ends_with_sep else ""))
+        pattern = re.compile(pattern)
+        out = {
+            p: info
+            for p, info in sorted(allpaths.items())
+            if pattern.match(
+                p + "/"
+                if append_slash_to_dirname and info["type"] == "directory"
+                else p
+            )
+        }
+        if detail:
+            return out
+        else:
+            return list(out)
+    async def _du(self, path, total=True, maxdepth=None, **kwargs):
+        sizes = {}
+        # async for?
+        for f in await self._find(path, maxdepth=maxdepth, **kwargs):
+            info = await self._info(f)
+            sizes[info["name"]] = info["size"]
+        if total:
+            return sum(sizes.values())
+        else:
+            return sizes
+    async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
+        path = self._strip_protocol(path)
+        out = {}
+        detail = kwargs.pop("detail", False)
+        # Add the root directory if withdirs is requested
+        # This is needed for posix glob compliance
+        if withdirs and path != "" and await self._isdir(path):
+            out[path] = await self._info(path)
+        # async for?
+        async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
+            if withdirs:
+                files.update(dirs)
+            out.update({info["name"]: info for name, info in files.items()})
+        if not out and (await self._isfile(path)):
+            # walk works on directories, but find should also return [path]
+            # when path happens to be a file
+            out[path] = {}
+        names = sorted(out)
+        if not detail:
+            return names
+        else:
+            return {name: out[name] for name in names}
+    async def _expand_path(self, path, recursive=False, maxdepth=None):
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        if isinstance(path, str):
+            out = await self._expand_path([path], recursive, maxdepth)
+        else:
+            out = set()
+            path = [self._strip_protocol(p) for p in path]
+            for p in path:  # can gather here
+                if has_magic(p):
+                    bit = set(await self._glob(p, maxdepth=maxdepth))
+                    out |= bit
+                    if recursive:
+                        # glob call above expanded one depth so if maxdepth is defined
+                        # then decrement it in expand_path call below. If it is zero
+                        # after decrementing then avoid expand_path call.
+                        if maxdepth is not None and maxdepth <= 1:
+                            continue
+                        out |= set(
+                            await self._expand_path(
+                                list(bit),
+                                recursive=recursive,
+                                maxdepth=maxdepth - 1 if maxdepth is not None else None,
+                            )
+                        )
+                    continue
+                elif recursive:
+                    rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
+                    out |= rec
+                if p not in out and (recursive is False or (await self._exists(p))):
+                    # should only check once, for the root
+                    out.add(p)
+        if not out:
+            raise FileNotFoundError(path)
+        return sorted(out)
+    async def _mkdir(self, path, create_parents=True, **kwargs):
+        pass  # not necessary to implement, may not have directories
+    async def _makedirs(self, path, exist_ok=False):
+        pass  # not necessary to implement, may not have directories
+    async def open_async(self, path, mode="rb", **kwargs):
+        if "b" not in mode or kwargs.get("compression"):
+            raise ValueError
+        raise NotImplementedError
+def mirror_sync_methods(obj):
+    """Populate sync and async methods for obj
+    For each method will create a sync version if the name refers to an async method
+    (coroutine) and there is no override in the child class; will create an async
+    method for the corresponding sync method if there is no implementation.
+    Uses the methods specified in
+    - async_methods: the set that an implementation is expected to provide
+    - default_async_methods: that can be derived from their sync version in
+      AbstractFileSystem
+    - AsyncFileSystem: async-specific default coroutines
+    """
+    from fsspec import AbstractFileSystem
+    for method in async_methods + dir(AsyncFileSystem):
+        if not method.startswith("_"):
+            continue
+        smethod = method[1:]
+        if private.match(method):
+            isco = inspect.iscoroutinefunction(getattr(obj, method, None))
+            unsync = getattr(getattr(obj, smethod, False), "__func__", None)
+            is_default = unsync is getattr(AbstractFileSystem, smethod, "")
+            if isco and is_default:
+                mth = sync_wrapper(getattr(obj, method), obj=obj)
+                setattr(obj, smethod, mth)
+                if not mth.__doc__:
+                    mth.__doc__ = getattr(
+                        getattr(AbstractFileSystem, smethod, None), "__doc__", ""
+                    )
+class FSSpecCoroutineCancel(Exception):
+    pass
+def _dump_running_tasks(
+    printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
+):
+    import traceback
+    tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
+    if printout:
+        [task.print_stack() for task in tasks]
+    out = [
+        {
+            "locals": task._coro.cr_frame.f_locals,
+            "file": task._coro.cr_frame.f_code.co_filename,
+            "firstline": task._coro.cr_frame.f_code.co_firstlineno,
+            "linelo": task._coro.cr_frame.f_lineno,
+            "stack": traceback.format_stack(task._coro.cr_frame),
+            "task": task if with_task else None,
+        }
+        for task in tasks
+    ]
+    if cancel:
+        for t in tasks:
+            cbs = t._callbacks
+            t.cancel()
+            asyncio.futures.Future.set_exception(t, exc)
+            asyncio.futures.Future.cancel(t)
+            [cb[0](t) for cb in cbs]  # cancels any dependent concurrent.futures
+            try:
+                t._coro.throw(exc)  # exits coro, unless explicitly handled
+            except exc:
+                pass
+    return out
+class AbstractAsyncStreamedFile(AbstractBufferedFile):
+    # no read buffering, and always auto-commit
+    # TODO: readahead might still be useful here, but needs async version
+    async def read(self, length=-1):
+        """
+        Return data from cache, or fetch pieces as necessary
+        Parameters
+        ----------
+        length: int (-1)
+            Number of bytes to read; if <0, all remaining bytes.
+        """
+        length = -1 if length is None else int(length)
+        if self.mode != "rb":
+            raise ValueError("File not in read mode")
+        if length < 0:
+            length = self.size - self.loc
+        if self.closed:
+            raise ValueError("I/O operation on closed file.")
+        if length == 0:
+            # don't even bother calling fetch
+            return b""
+        out = await self._fetch_range(self.loc, self.loc + length)
+        self.loc += len(out)
+        return out
+    async def write(self, data):
+        """
+        Write data to buffer.
+        Buffer only sent on flush() or if buffer is greater than
+        or equal to blocksize.
+        Parameters
+        ----------
+        data: bytes
+            Set of bytes to be written.
+        """
+        if self.mode not in {"wb", "ab"}:
+            raise ValueError("File not in write mode")
+        if self.closed:
+            raise ValueError("I/O operation on closed file.")
+        if self.forced:
+            raise ValueError("This file has been force-flushed, can only close")
+        out = self.buffer.write(data)
+        self.loc += out
+        if self.buffer.tell() >= self.blocksize:
+            await self.flush()
+        return out
+    async def close(self):
+        """Close file
+        Finalizes writes, discards cache
+        """
+        if getattr(self, "_unclosable", False):
+            return
+        if self.closed:
+            return
+        if self.mode == "rb":
+            self.cache = None
+        else:
+            if not self.forced:
+                await self.flush(force=True)
+            if self.fs is not None:
+                self.fs.invalidate_cache(self.path)
+                self.fs.invalidate_cache(self.fs._parent(self.path))
+        self.closed = True
+    async def flush(self, force=False):
+        if self.closed:
+            raise ValueError("Flush on closed file")
+        if force and self.forced:
+            raise ValueError("Force flush cannot be called more than once")
+        if force:
+            self.forced = True
+        if self.mode not in {"wb", "ab"}:
+            # no-op to flush on read-mode
+            return
+        if not force and self.buffer.tell() < self.blocksize:
+            # Defer write on small block
+            return
+        if self.offset is None:
+            # Initialize a multipart upload
+            self.offset = 0
+            try:
+                await self._initiate_upload()
+            except:
+                self.closed = True
+                raise
+        if await self._upload_chunk(final=force) is not False:
+            self.offset += self.buffer.seek(0, 2)
+            self.buffer = io.BytesIO()
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
+    async def _fetch_range(self, start, end):
+        raise NotImplementedError
+    async def _initiate_upload(self):
+        pass
+    async def _upload_chunk(self, final=False):
+        raise NotImplementedError

venv/lib/python3.12/site-packages/fsspec/caching.py ADDED Viewed

	@@ -0,0 +1,1005 @@

+from __future__ import annotations
+import collections
+import functools
+import logging
+import math
+import os
+import threading
+import warnings
+from concurrent.futures import Future, ThreadPoolExecutor
+from itertools import groupby
+from operator import itemgetter
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    ClassVar,
+    Generic,
+    NamedTuple,
+    Optional,
+    OrderedDict,
+    TypeVar,
+)
+if TYPE_CHECKING:
+    import mmap
+    from typing_extensions import ParamSpec
+    P = ParamSpec("P")
+else:
+    P = TypeVar("P")
+T = TypeVar("T")
+logger = logging.getLogger("fsspec")
+Fetcher = Callable[[int, int], bytes]  # Maps (start, end) to bytes
+MultiFetcher = Callable[[list[int, int]], bytes]  # Maps [(start, end)] to bytes
+class BaseCache:
+    """Pass-though cache: doesn't keep anything, calls every time
+    Acts as base class for other cachers
+    Parameters
+    ----------
+    blocksize: int
+        How far to read ahead in numbers of bytes
+    fetcher: func
+        Function of the form f(start, end) which gets bytes from remote as
+        specified
+    size: int
+        How big this file is
+    """
+    name: ClassVar[str] = "none"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        self.blocksize = blocksize
+        self.nblocks = 0
+        self.fetcher = fetcher
+        self.size = size
+        self.hit_count = 0
+        self.miss_count = 0
+        # the bytes that we actually requested
+        self.total_requested_bytes = 0
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if stop is None:
+            stop = self.size
+        if start >= self.size or start >= stop:
+            return b""
+        return self.fetcher(start, stop)
+    def _reset_stats(self) -> None:
+        """Reset hit and miss counts for a more ganular report e.g. by file."""
+        self.hit_count = 0
+        self.miss_count = 0
+        self.total_requested_bytes = 0
+    def _log_stats(self) -> str:
+        """Return a formatted string of the cache statistics."""
+        if self.hit_count == 0 and self.miss_count == 0:
+            # a cache that does nothing, this is for logs only
+            return ""
+        return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
+    def __repr__(self) -> str:
+        # TODO: use rich for better formatting
+        return f"""
+        <{self.__class__.__name__}:
+            block size  :   {self.blocksize}
+            block count :   {self.nblocks}
+            file size   :   {self.size}
+            cache hits  :   {self.hit_count}
+            cache misses:   {self.miss_count}
+            total requested bytes: {self.total_requested_bytes}>
+        """
+class MMapCache(BaseCache):
+    """memory-mapped sparse file cache
+    Opens temporary file, which is filled blocks-wise when data is requested.
+    Ensure there is enough disc space in the temporary location.
+    This cache method might only work on posix
+    Parameters
+    ----------
+    blocksize: int
+        How far to read ahead in numbers of bytes
+    fetcher: Fetcher
+        Function of the form f(start, end) which gets bytes from remote as
+        specified
+    size: int
+        How big this file is
+    location: str
+        Where to create the temporary file. If None, a temporary file is
+        created using tempfile.TemporaryFile().
+    blocks: set[int]
+        Set of block numbers that have already been fetched. If None, an empty
+        set is created.
+    multi_fetcher: MultiFetcher
+        Function of the form f([(start, end)]) which gets bytes from remote
+        as specified. This function is used to fetch multiple blocks at once.
+        If not specified, the fetcher function is used instead.
+    """
+    name = "mmap"
+    def __init__(
+        self,
+        blocksize: int,
+        fetcher: Fetcher,
+        size: int,
+        location: str | None = None,
+        blocks: set[int] | None = None,
+        multi_fetcher: MultiFetcher | None = None,
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.blocks = set() if blocks is None else blocks
+        self.location = location
+        self.multi_fetcher = multi_fetcher
+        self.cache = self._makefile()
+    def _makefile(self) -> mmap.mmap | bytearray:
+        import mmap
+        import tempfile
+        if self.size == 0:
+            return bytearray()
+        # posix version
+        if self.location is None or not os.path.exists(self.location):
+            if self.location is None:
+                fd = tempfile.TemporaryFile()
+                self.blocks = set()
+            else:
+                fd = open(self.location, "wb+")
+            fd.seek(self.size - 1)
+            fd.write(b"1")
+            fd.flush()
+        else:
+            fd = open(self.location, "r+b")
+        return mmap.mmap(fd.fileno(), self.size)
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        logger.debug(f"MMap cache fetching {start}-{end}")
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        start_block = start // self.blocksize
+        end_block = end // self.blocksize
+        block_range = range(start_block, end_block + 1)
+        # Determine which blocks need to be fetched. This sequence is sorted by construction.
+        need = (i for i in block_range if i not in self.blocks)
+        # Count the number of blocks already cached
+        self.hit_count += sum(1 for i in block_range if i in self.blocks)
+        ranges = []
+        # Consolidate needed blocks.
+        # Algorithm adapted from Python 2.x itertools documentation.
+        # We are grouping an enumerated sequence of blocks. By comparing when the difference
+        # between an ascending range (provided by enumerate) and the needed block numbers
+        # we can detect when the block number skips values. The key computes this difference.
+        # Whenever the difference changes, we know that we have previously cached block(s),
+        # and a new group is started. In other words, this algorithm neatly groups
+        # runs of consecutive block numbers so they can be fetched together.
+        for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
+            # Extract the blocks from the enumerated sequence
+            _blocks = tuple(map(itemgetter(1), _blocks))
+            # Compute start of first block
+            sstart = _blocks[0] * self.blocksize
+            # Compute the end of the last block. Last block may not be full size.
+            send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
+            # Fetch bytes (could be multiple consecutive blocks)
+            self.total_requested_bytes += send - sstart
+            logger.debug(
+                f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
+            )
+            ranges.append((sstart, send))
+            # Update set of cached blocks
+            self.blocks.update(_blocks)
+            # Update cache statistics with number of blocks we had to cache
+            self.miss_count += len(_blocks)
+        if not ranges:
+            return self.cache[start:end]
+        if self.multi_fetcher:
+            logger.debug(f"MMap get blocks {ranges}")
+            for idx, r in enumerate(self.multi_fetcher(ranges)):
+                (sstart, send) = ranges[idx]
+                logger.debug(f"MMap copy block ({sstart}-{send}")
+                self.cache[sstart:send] = r
+        else:
+            for sstart, send in ranges:
+                logger.debug(f"MMap get block ({sstart}-{send}")
+                self.cache[sstart:send] = self.fetcher(sstart, send)
+        return self.cache[start:end]
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__.copy()
+        # Remove the unpicklable entries.
+        del state["cache"]
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        # Restore instance attributes
+        self.__dict__.update(state)
+        self.cache = self._makefile()
+class ReadAheadCache(BaseCache):
+    """Cache which reads only when we get beyond a block of data
+    This is a much simpler version of BytesCache, and does not attempt to
+    fill holes in the cache or keep fragments alive. It is best suited to
+    many small reads in a sequential order (e.g., reading lines from a file).
+    """
+    name = "readahead"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.cache = b""
+        self.start = 0
+        self.end = 0
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None or end > self.size:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        l = end - start
+        if start >= self.start and end <= self.end:
+            # cache hit
+            self.hit_count += 1
+            return self.cache[start - self.start : end - self.start]
+        elif self.start <= start < self.end:
+            # partial hit
+            self.miss_count += 1
+            part = self.cache[start - self.start :]
+            l -= len(part)
+            start = self.end
+        else:
+            # miss
+            self.miss_count += 1
+            part = b""
+        end = min(self.size, end + self.blocksize)
+        self.total_requested_bytes += end - start
+        self.cache = self.fetcher(start, end)  # new block replaces old
+        self.start = start
+        self.end = self.start + len(self.cache)
+        return part + self.cache[:l]
+class FirstChunkCache(BaseCache):
+    """Caches the first block of a file only
+    This may be useful for file types where the metadata is stored in the header,
+    but is randomly accessed.
+    """
+    name = "first"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        if blocksize > size:
+            # this will buffer the whole thing
+            blocksize = size
+        super().__init__(blocksize, fetcher, size)
+        self.cache: bytes | None = None
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        start = start or 0
+        if start > self.size:
+            logger.debug("FirstChunkCache: requested start > file size")
+            return b""
+        end = min(end, self.size)
+        if start < self.blocksize:
+            if self.cache is None:
+                self.miss_count += 1
+                if end > self.blocksize:
+                    self.total_requested_bytes += end
+                    data = self.fetcher(0, end)
+                    self.cache = data[: self.blocksize]
+                    return data[start:]
+                self.cache = self.fetcher(0, self.blocksize)
+                self.total_requested_bytes += self.blocksize
+            part = self.cache[start:end]
+            if end > self.blocksize:
+                self.total_requested_bytes += end - self.blocksize
+                part += self.fetcher(self.blocksize, end)
+            self.hit_count += 1
+            return part
+        else:
+            self.miss_count += 1
+            self.total_requested_bytes += end - start
+            return self.fetcher(start, end)
+class BlockCache(BaseCache):
+    """
+    Cache holding memory as a set of blocks.
+    Requests are only ever made ``blocksize`` at a time, and are
+    stored in an LRU cache. The least recently accessed block is
+    discarded when more than ``maxblocks`` are stored.
+    Parameters
+    ----------
+    blocksize : int
+        The number of bytes to store in each block.
+        Requests are only ever made for ``blocksize``, so this
+        should balance the overhead of making a request against
+        the granularity of the blocks.
+    fetcher : Callable
+    size : int
+        The total size of the file being cached.
+    maxblocks : int
+        The maximum number of blocks to cache for. The maximum memory
+        use for this cache is then ``blocksize * maxblocks``.
+    """
+    name = "blockcache"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.nblocks = math.ceil(size / blocksize)
+        self.maxblocks = maxblocks
+        self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
+    def cache_info(self):
+        """
+        The statistics on the block cache.
+        Returns
+        -------
+        NamedTuple
+            Returned directly from the LRU Cache used internally.
+        """
+        return self._fetch_block_cached.cache_info()
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__
+        del state["_fetch_block_cached"]
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        self.__dict__.update(state)
+        self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
+            self._fetch_block
+        )
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        # byte position -> block numbers
+        start_block_number = start // self.blocksize
+        end_block_number = end // self.blocksize
+        # these are cached, so safe to do multiple calls for the same start and end.
+        for block_number in range(start_block_number, end_block_number + 1):
+            self._fetch_block_cached(block_number)
+        return self._read_cache(
+            start,
+            end,
+            start_block_number=start_block_number,
+            end_block_number=end_block_number,
+        )
+    def _fetch_block(self, block_number: int) -> bytes:
+        """
+        Fetch the block of data for `block_number`.
+        """
+        if block_number > self.nblocks:
+            raise ValueError(
+                f"'block_number={block_number}' is greater than "
+                f"the number of blocks ({self.nblocks})"
+            )
+        start = block_number * self.blocksize
+        end = start + self.blocksize
+        self.total_requested_bytes += end - start
+        self.miss_count += 1
+        logger.info("BlockCache fetching block %d", block_number)
+        block_contents = super()._fetch(start, end)
+        return block_contents
+    def _read_cache(
+        self, start: int, end: int, start_block_number: int, end_block_number: int
+    ) -> bytes:
+        """
+        Read from our block cache.
+        Parameters
+        ----------
+        start, end : int
+            The start and end byte positions.
+        start_block_number, end_block_number : int
+            The start and end block numbers.
+        """
+        start_pos = start % self.blocksize
+        end_pos = end % self.blocksize
+        self.hit_count += 1
+        if start_block_number == end_block_number:
+            block: bytes = self._fetch_block_cached(start_block_number)
+            return block[start_pos:end_pos]
+        else:
+            # read from the initial
+            out = [self._fetch_block_cached(start_block_number)[start_pos:]]
+            # intermediate blocks
+            # Note: it'd be nice to combine these into one big request. However
+            # that doesn't play nicely with our LRU cache.
+            out.extend(
+                map(
+                    self._fetch_block_cached,
+                    range(start_block_number + 1, end_block_number),
+                )
+            )
+            # final block
+            out.append(self._fetch_block_cached(end_block_number)[:end_pos])
+            return b"".join(out)
+class BytesCache(BaseCache):
+    """Cache which holds data in a in-memory bytes object
+    Implements read-ahead by the block size, for semi-random reads progressing
+    through the file.
+    Parameters
+    ----------
+    trim: bool
+        As we read more data, whether to discard the start of the buffer when
+        we are more than a blocksize ahead of it.
+    """
+    name: ClassVar[str] = "bytes"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.cache = b""
+        self.start: int | None = None
+        self.end: int | None = None
+        self.trim = trim
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        # TODO: only set start/end after fetch, in case it fails?
+        # is this where retry logic might go?
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        if (
+            self.start is not None
+            and start >= self.start
+            and self.end is not None
+            and end < self.end
+        ):
+            # cache hit: we have all the required data
+            offset = start - self.start
+            self.hit_count += 1
+            return self.cache[offset : offset + end - start]
+        if self.blocksize:
+            bend = min(self.size, end + self.blocksize)
+        else:
+            bend = end
+        if bend == start or start > self.size:
+            return b""
+        if (self.start is None or start < self.start) and (
+            self.end is None or end > self.end
+        ):
+            # First read, or extending both before and after
+            self.total_requested_bytes += bend - start
+            self.miss_count += 1
+            self.cache = self.fetcher(start, bend)
+            self.start = start
+        else:
+            assert self.start is not None
+            assert self.end is not None
+            self.miss_count += 1
+            if start < self.start:
+                if self.end is None or self.end - end > self.blocksize:
+                    self.total_requested_bytes += bend - start
+                    self.cache = self.fetcher(start, bend)
+                    self.start = start
+                else:
+                    self.total_requested_bytes += self.start - start
+                    new = self.fetcher(start, self.start)
+                    self.start = start
+                    self.cache = new + self.cache
+            elif self.end is not None and bend > self.end:
+                if self.end > self.size:
+                    pass
+                elif end - self.end > self.blocksize:
+                    self.total_requested_bytes += bend - start
+                    self.cache = self.fetcher(start, bend)
+                    self.start = start
+                else:
+                    self.total_requested_bytes += bend - self.end
+                    new = self.fetcher(self.end, bend)
+                    self.cache = self.cache + new
+        self.end = self.start + len(self.cache)
+        offset = start - self.start
+        out = self.cache[offset : offset + end - start]
+        if self.trim:
+            num = (self.end - self.start) // (self.blocksize + 1)
+            if num > 1:
+                self.start += self.blocksize * num
+                self.cache = self.cache[self.blocksize * num :]
+        return out
+    def __len__(self) -> int:
+        return len(self.cache)
+class AllBytes(BaseCache):
+    """Cache entire contents of the file"""
+    name: ClassVar[str] = "all"
+    def __init__(
+        self,
+        blocksize: int | None = None,
+        fetcher: Fetcher | None = None,
+        size: int | None = None,
+        data: bytes | None = None,
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)  # type: ignore[arg-type]
+        if data is None:
+            self.miss_count += 1
+            self.total_requested_bytes += self.size
+            data = self.fetcher(0, self.size)
+        self.data = data
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        self.hit_count += 1
+        return self.data[start:stop]
+class KnownPartsOfAFile(BaseCache):
+    """
+    Cache holding known file parts.
+    Parameters
+    ----------
+    blocksize: int
+        How far to read ahead in numbers of bytes
+    fetcher: func
+        Function of the form f(start, end) which gets bytes from remote as
+        specified
+    size: int
+        How big this file is
+    data: dict
+        A dictionary mapping explicit `(start, stop)` file-offset tuples
+        with known bytes.
+    strict: bool, default True
+        Whether to fetch reads that go beyond a known byte-range boundary.
+        If `False`, any read that ends outside a known part will be zero
+        padded. Note that zero padding will not be used for reads that
+        begin outside a known byte-range.
+    """
+    name: ClassVar[str] = "parts"
+    def __init__(
+        self,
+        blocksize: int,
+        fetcher: Fetcher,
+        size: int,
+        data: Optional[dict[tuple[int, int], bytes]] = None,
+        strict: bool = True,
+        **_: Any,
+    ):
+        super().__init__(blocksize, fetcher, size)
+        self.strict = strict
+        # simple consolidation of contiguous blocks
+        if data:
+            old_offsets = sorted(data.keys())
+            offsets = [old_offsets[0]]
+            blocks = [data.pop(old_offsets[0])]
+            for start, stop in old_offsets[1:]:
+                start0, stop0 = offsets[-1]
+                if start == stop0:
+                    offsets[-1] = (start0, stop)
+                    blocks[-1] += data.pop((start, stop))
+                else:
+                    offsets.append((start, stop))
+                    blocks.append(data.pop((start, stop)))
+            self.data = dict(zip(offsets, blocks))
+        else:
+            self.data = {}
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if stop is None:
+            stop = self.size
+        out = b""
+        for (loc0, loc1), data in self.data.items():
+            # If self.strict=False, use zero-padded data
+            # for reads beyond the end of a "known" buffer
+            if loc0 <= start < loc1:
+                off = start - loc0
+                out = data[off : off + stop - start]
+                if not self.strict or loc0 <= stop <= loc1:
+                    # The request is within a known range, or
+                    # it begins within a known range, and we
+                    # are allowed to pad reads beyond the
+                    # buffer with zero
+                    out += b"\x00" * (stop - start - len(out))
+                    self.hit_count += 1
+                    return out
+                else:
+                    # The request ends outside a known range,
+                    # and we are being "strict" about reads
+                    # beyond the buffer
+                    start = loc1
+                    break
+        # We only get here if there is a request outside the
+        # known parts of the file. In an ideal world, this
+        # should never happen
+        if self.fetcher is None:
+            # We cannot fetch the data, so raise an error
+            raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
+        # We can fetch the data, but should warn the user
+        # that this may be slow
+        warnings.warn(
+            f"Read is outside the known file parts: {(start, stop)}. "
+            f"IO/caching performance may be poor!"
+        )
+        logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
+        self.total_requested_bytes += stop - start
+        self.miss_count += 1
+        return out + super()._fetch(start, stop)
+class UpdatableLRU(Generic[P, T]):
+    """
+    Custom implementation of LRU cache that allows updating keys
+    Used by BackgroudBlockCache
+    """
+    class CacheInfo(NamedTuple):
+        hits: int
+        misses: int
+        maxsize: int
+        currsize: int
+    def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
+        self._cache: OrderedDict[Any, T] = collections.OrderedDict()
+        self._func = func
+        self._max_size = max_size
+        self._hits = 0
+        self._misses = 0
+        self._lock = threading.Lock()
+    def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
+        if kwargs:
+            raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
+        with self._lock:
+            if args in self._cache:
+                self._cache.move_to_end(args)
+                self._hits += 1
+                return self._cache[args]
+        result = self._func(*args, **kwargs)
+        with self._lock:
+            self._cache[args] = result
+            self._misses += 1
+            if len(self._cache) > self._max_size:
+                self._cache.popitem(last=False)
+        return result
+    def is_key_cached(self, *args: Any) -> bool:
+        with self._lock:
+            return args in self._cache
+    def add_key(self, result: T, *args: Any) -> None:
+        with self._lock:
+            self._cache[args] = result
+            if len(self._cache) > self._max_size:
+                self._cache.popitem(last=False)
+    def cache_info(self) -> UpdatableLRU.CacheInfo:
+        with self._lock:
+            return self.CacheInfo(
+                maxsize=self._max_size,
+                currsize=len(self._cache),
+                hits=self._hits,
+                misses=self._misses,
+            )
+class BackgroundBlockCache(BaseCache):
+    """
+    Cache holding memory as a set of blocks with pre-loading of
+    the next block in the background.
+    Requests are only ever made ``blocksize`` at a time, and are
+    stored in an LRU cache. The least recently accessed block is
+    discarded when more than ``maxblocks`` are stored. If the
+    next block is not in cache, it is loaded in a separate thread
+    in non-blocking way.
+    Parameters
+    ----------
+    blocksize : int
+        The number of bytes to store in each block.
+        Requests are only ever made for ``blocksize``, so this
+        should balance the overhead of making a request against
+        the granularity of the blocks.
+    fetcher : Callable
+    size : int
+        The total size of the file being cached.
+    maxblocks : int
+        The maximum number of blocks to cache for. The maximum memory
+        use for this cache is then ``blocksize * maxblocks``.
+    """
+    name: ClassVar[str] = "background"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.nblocks = math.ceil(size / blocksize)
+        self.maxblocks = maxblocks
+        self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
+        self._thread_executor = ThreadPoolExecutor(max_workers=1)
+        self._fetch_future_block_number: int | None = None
+        self._fetch_future: Future[bytes] | None = None
+        self._fetch_future_lock = threading.Lock()
+    def cache_info(self) -> UpdatableLRU.CacheInfo:
+        """
+        The statistics on the block cache.
+        Returns
+        -------
+        NamedTuple
+            Returned directly from the LRU Cache used internally.
+        """
+        return self._fetch_block_cached.cache_info()
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__
+        del state["_fetch_block_cached"]
+        del state["_thread_executor"]
+        del state["_fetch_future_block_number"]
+        del state["_fetch_future"]
+        del state["_fetch_future_lock"]
+        return state
+    def __setstate__(self, state) -> None:
+        self.__dict__.update(state)
+        self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
+        self._thread_executor = ThreadPoolExecutor(max_workers=1)
+        self._fetch_future_block_number = None
+        self._fetch_future = None
+        self._fetch_future_lock = threading.Lock()
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        # byte position -> block numbers
+        start_block_number = start // self.blocksize
+        end_block_number = end // self.blocksize
+        fetch_future_block_number = None
+        fetch_future = None
+        with self._fetch_future_lock:
+            # Background thread is running. Check we we can or must join it.
+            if self._fetch_future is not None:
+                assert self._fetch_future_block_number is not None
+                if self._fetch_future.done():
+                    logger.info("BlockCache joined background fetch without waiting.")
+                    self._fetch_block_cached.add_key(
+                        self._fetch_future.result(), self._fetch_future_block_number
+                    )
+                    # Cleanup the fetch variables. Done with fetching the block.
+                    self._fetch_future_block_number = None
+                    self._fetch_future = None
+                else:
+                    # Must join if we need the block for the current fetch
+                    must_join = bool(
+                        start_block_number
+                        <= self._fetch_future_block_number
+                        <= end_block_number
+                    )
+                    if must_join:
+                        # Copy to the local variables to release lock
+                        # before waiting for result
+                        fetch_future_block_number = self._fetch_future_block_number
+                        fetch_future = self._fetch_future
+                        # Cleanup the fetch variables. Have a local copy.
+                        self._fetch_future_block_number = None
+                        self._fetch_future = None
+        # Need to wait for the future for the current read
+        if fetch_future is not None:
+            logger.info("BlockCache waiting for background fetch.")
+            # Wait until result and put it in cache
+            self._fetch_block_cached.add_key(
+                fetch_future.result(), fetch_future_block_number
+            )
+        # these are cached, so safe to do multiple calls for the same start and end.
+        for block_number in range(start_block_number, end_block_number + 1):
+            self._fetch_block_cached(block_number)
+        # fetch next block in the background if nothing is running in the background,
+        # the block is within file and it is not already cached
+        end_block_plus_1 = end_block_number + 1
+        with self._fetch_future_lock:
+            if (
+                self._fetch_future is None
+                and end_block_plus_1 <= self.nblocks
+                and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
+            ):
+                self._fetch_future_block_number = end_block_plus_1
+                self._fetch_future = self._thread_executor.submit(
+                    self._fetch_block, end_block_plus_1, "async"
+                )
+        return self._read_cache(
+            start,
+            end,
+            start_block_number=start_block_number,
+            end_block_number=end_block_number,
+        )
+    def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
+        """
+        Fetch the block of data for `block_number`.
+        """
+        if block_number > self.nblocks:
+            raise ValueError(
+                f"'block_number={block_number}' is greater than "
+                f"the number of blocks ({self.nblocks})"
+            )
+        start = block_number * self.blocksize
+        end = start + self.blocksize
+        logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
+        self.total_requested_bytes += end - start
+        self.miss_count += 1
+        block_contents = super()._fetch(start, end)
+        return block_contents
+    def _read_cache(
+        self, start: int, end: int, start_block_number: int, end_block_number: int
+    ) -> bytes:
+        """
+        Read from our block cache.
+        Parameters
+        ----------
+        start, end : int
+            The start and end byte positions.
+        start_block_number, end_block_number : int
+            The start and end block numbers.
+        """
+        start_pos = start % self.blocksize
+        end_pos = end % self.blocksize
+        # kind of pointless to count this as a hit, but it is
+        self.hit_count += 1
+        if start_block_number == end_block_number:
+            block = self._fetch_block_cached(start_block_number)
+            return block[start_pos:end_pos]
+        else:
+            # read from the initial
+            out = [self._fetch_block_cached(start_block_number)[start_pos:]]
+            # intermediate blocks
+            # Note: it'd be nice to combine these into one big request. However
+            # that doesn't play nicely with our LRU cache.
+            out.extend(
+                map(
+                    self._fetch_block_cached,
+                    range(start_block_number + 1, end_block_number),
+                )
+            )
+            # final block
+            out.append(self._fetch_block_cached(end_block_number)[:end_pos])
+            return b"".join(out)
+caches: dict[str | None, type[BaseCache]] = {
+    # one custom case
+    None: BaseCache,
+}
+def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
+    """'Register' cache implementation.
+    Parameters
+    ----------
+    clobber: bool, optional
+        If set to True (default is False) - allow to overwrite existing
+        entry.
+    Raises
+    ------
+    ValueError
+    """
+    name = cls.name
+    if not clobber and name in caches:
+        raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
+    caches[name] = cls
+for c in (
+    BaseCache,
+    MMapCache,
+    BytesCache,
+    ReadAheadCache,
+    BlockCache,
+    FirstChunkCache,
+    AllBytes,
+    KnownPartsOfAFile,
+    BackgroundBlockCache,
+):
+    register_cache(c)

venv/lib/python3.12/site-packages/fsspec/callbacks.py ADDED Viewed

	@@ -0,0 +1,324 @@

+from functools import wraps
+class Callback:
+    """
+    Base class and interface for callback mechanism
+    This class can be used directly for monitoring file transfers by
+    providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
+    below), or subclassed for more specialised behaviour.
+    Parameters
+    ----------
+    size: int (optional)
+        Nominal quantity for the value that corresponds to a complete
+        transfer, e.g., total number of tiles or total number of
+        bytes
+    value: int (0)
+        Starting internal counter value
+    hooks: dict or None
+        A dict of named functions to be called on each update. The signature
+        of these must be ``f(size, value, **kwargs)``
+    """
+    def __init__(self, size=None, value=0, hooks=None, **kwargs):
+        self.size = size
+        self.value = value
+        self.hooks = hooks or {}
+        self.kw = kwargs
+    def __enter__(self):
+        return self
+    def __exit__(self, *exc_args):
+        self.close()
+    def close(self):
+        """Close callback."""
+    def branched(self, path_1, path_2, **kwargs):
+        """
+        Return callback for child transfers
+        If this callback is operating at a higher level, e.g., put, which may
+        trigger transfers that can also be monitored. The function returns a callback
+        that has to be passed to the child method, e.g., put_file,
+        as `callback=` argument.
+        The implementation uses `callback.branch` for compatibility.
+        When implementing callbacks, it is recommended to override this function instead
+        of `branch` and avoid calling `super().branched(...)`.
+        Prefer using this function over `branch`.
+        Parameters
+        ----------
+        path_1: str
+            Child's source path
+        path_2: str
+            Child's destination path
+        **kwargs:
+            Arbitrary keyword arguments
+        Returns
+        -------
+        callback: Callback
+            A callback instance to be passed to the child method
+        """
+        self.branch(path_1, path_2, kwargs)
+        # mutate kwargs so that we can force the caller to pass "callback=" explicitly
+        return kwargs.pop("callback", DEFAULT_CALLBACK)
+    def branch_coro(self, fn):
+        """
+        Wraps a coroutine, and pass a new child callback to it.
+        """
+        @wraps(fn)
+        async def func(path1, path2: str, **kwargs):
+            with self.branched(path1, path2, **kwargs) as child:
+                return await fn(path1, path2, callback=child, **kwargs)
+        return func
+    def set_size(self, size):
+        """
+        Set the internal maximum size attribute
+        Usually called if not initially set at instantiation. Note that this
+        triggers a ``call()``.
+        Parameters
+        ----------
+        size: int
+        """
+        self.size = size
+        self.call()
+    def absolute_update(self, value):
+        """
+        Set the internal value state
+        Triggers ``call()``
+        Parameters
+        ----------
+        value: int
+        """
+        self.value = value
+        self.call()
+    def relative_update(self, inc=1):
+        """
+        Delta increment the internal counter
+        Triggers ``call()``
+        Parameters
+        ----------
+        inc: int
+        """
+        self.value += inc
+        self.call()
+    def call(self, hook_name=None, **kwargs):
+        """
+        Execute hook(s) with current state
+        Each function is passed the internal size and current value
+        Parameters
+        ----------
+        hook_name: str or None
+            If given, execute on this hook
+        kwargs: passed on to (all) hook(s)
+        """
+        if not self.hooks:
+            return
+        kw = self.kw.copy()
+        kw.update(kwargs)
+        if hook_name:
+            if hook_name not in self.hooks:
+                return
+            return self.hooks[hook_name](self.size, self.value, **kw)
+        for hook in self.hooks.values() or []:
+            hook(self.size, self.value, **kw)
+    def wrap(self, iterable):
+        """
+        Wrap an iterable to call ``relative_update`` on each iterations
+        Parameters
+        ----------
+        iterable: Iterable
+            The iterable that is being wrapped
+        """
+        for item in iterable:
+            self.relative_update()
+            yield item
+    def branch(self, path_1, path_2, kwargs):
+        """
+        Set callbacks for child transfers
+        If this callback is operating at a higher level, e.g., put, which may
+        trigger transfers that can also be monitored. The passed kwargs are
+        to be *mutated* to add ``callback=``, if this class supports branching
+        to children.
+        Parameters
+        ----------
+        path_1: str
+            Child's source path
+        path_2: str
+            Child's destination path
+        kwargs: dict
+            arguments passed to child method, e.g., put_file.
+        Returns
+        -------
+        """
+        return None
+    def no_op(self, *_, **__):
+        pass
+    def __getattr__(self, item):
+        """
+        If undefined methods are called on this class, nothing happens
+        """
+        return self.no_op
+    @classmethod
+    def as_callback(cls, maybe_callback=None):
+        """Transform callback=... into Callback instance
+        For the special value of ``None``, return the global instance of
+        ``NoOpCallback``. This is an alternative to including
+        ``callback=DEFAULT_CALLBACK`` directly in a method signature.
+        """
+        if maybe_callback is None:
+            return DEFAULT_CALLBACK
+        return maybe_callback
+class NoOpCallback(Callback):
+    """
+    This implementation of Callback does exactly nothing
+    """
+    def call(self, *args, **kwargs):
+        return None
+class DotPrinterCallback(Callback):
+    """
+    Simple example Callback implementation
+    Almost identical to Callback with a hook that prints a char; here we
+    demonstrate how the outer layer may print "#" and the inner layer "."
+    """
+    def __init__(self, chr_to_print="#", **kwargs):
+        self.chr = chr_to_print
+        super().__init__(**kwargs)
+    def branch(self, path_1, path_2, kwargs):
+        """Mutate kwargs to add new instance with different print char"""
+        kwargs["callback"] = DotPrinterCallback(".")
+    def call(self, **kwargs):
+        """Just outputs a character"""
+        print(self.chr, end="")
+class TqdmCallback(Callback):
+    """
+    A callback to display a progress bar using tqdm
+    Parameters
+    ----------
+    tqdm_kwargs : dict, (optional)
+        Any argument accepted by the tqdm constructor.
+        See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
+        Will be forwarded to `tqdm_cls`.
+    tqdm_cls: (optional)
+        subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
+    Examples
+    --------
+    >>> import fsspec
+    >>> from fsspec.callbacks import TqdmCallback
+    >>> fs = fsspec.filesystem("memory")
+    >>> path2distant_data = "/your-path"
+    >>> fs.upload(
+            ".",
+            path2distant_data,
+            recursive=True,
+            callback=TqdmCallback(),
+        )
+    You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
+    >>> fs.upload(
+            ".",
+            path2distant_data,
+            recursive=True,
+            callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
+        )
+    You can also customize the progress bar by passing a subclass of `tqdm`.
+    .. code-block:: python
+        class TqdmFormat(tqdm):
+            '''Provides a `total_time` format parameter'''
+            @property
+            def format_dict(self):
+                d = super().format_dict
+                total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
+                d.update(total_time=self.format_interval(total_time) + " in total")
+                return d
+    >>> with TqdmCallback(
+            tqdm_kwargs={
+                "desc": "desc",
+                "bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
+            },
+            tqdm_cls=TqdmFormat,
+        ) as callback:
+            fs.upload(".", path2distant_data, recursive=True, callback=callback)
+    """
+    def __init__(self, tqdm_kwargs=None, *args, **kwargs):
+        try:
+            from tqdm import tqdm
+        except ImportError as exce:
+            raise ImportError(
+                "Using TqdmCallback requires tqdm to be installed"
+            ) from exce
+        self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
+        self._tqdm_kwargs = tqdm_kwargs or {}
+        self.tqdm = None
+        super().__init__(*args, **kwargs)
+    def call(self, *args, **kwargs):
+        if self.tqdm is None:
+            self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
+        self.tqdm.total = self.size
+        self.tqdm.update(self.value - self.tqdm.n)
+    def close(self):
+        if self.tqdm is not None:
+            self.tqdm.close()
+            self.tqdm = None
+    def __del__(self):
+        return self.close()
+DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()

venv/lib/python3.12/site-packages/fsspec/compression.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""Helper functions for a standard streaming compression API"""
+from zipfile import ZipFile
+import fsspec.utils
+from fsspec.spec import AbstractBufferedFile
+def noop_file(file, mode, **kwargs):
+    return file
+# TODO: files should also be available as contexts
+# should be functions of the form func(infile, mode=, **kwargs) -> file-like
+compr = {None: noop_file}
+def register_compression(name, callback, extensions, force=False):
+    """Register an "inferable" file compression type.
+    Registers transparent file compression type for use with fsspec.open.
+    Compression can be specified by name in open, or "infer"-ed for any files
+    ending with the given extensions.
+    Args:
+        name: (str) The compression type name. Eg. "gzip".
+        callback: A callable of form (infile, mode, **kwargs) -> file-like.
+            Accepts an input file-like object, the target mode and kwargs.
+            Returns a wrapped file-like object.
+        extensions: (str, Iterable[str]) A file extension, or list of file
+            extensions for which to infer this compression scheme. Eg. "gz".
+        force: (bool) Force re-registration of compression type or extensions.
+    Raises:
+        ValueError: If name or extensions already registered, and not force.
+    """
+    if isinstance(extensions, str):
+        extensions = [extensions]
+    # Validate registration
+    if name in compr and not force:
+        raise ValueError(f"Duplicate compression registration: {name}")
+    for ext in extensions:
+        if ext in fsspec.utils.compressions and not force:
+            raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
+    compr[name] = callback
+    for ext in extensions:
+        fsspec.utils.compressions[ext] = name
+def unzip(infile, mode="rb", filename=None, **kwargs):
+    if "r" not in mode:
+        filename = filename or "file"
+        z = ZipFile(infile, mode="w", **kwargs)
+        fo = z.open(filename, mode="w")
+        fo.close = lambda closer=fo.close: closer() or z.close()
+        return fo
+    z = ZipFile(infile)
+    if filename is None:
+        filename = z.namelist()[0]
+    return z.open(filename, mode="r", **kwargs)
+register_compression("zip", unzip, "zip")
+try:
+    from bz2 import BZ2File
+except ImportError:
+    pass
+else:
+    register_compression("bz2", BZ2File, "bz2")
+try:  # pragma: no cover
+    from isal import igzip
+    def isal(infile, mode="rb", **kwargs):
+        return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
+    register_compression("gzip", isal, "gz")
+except ImportError:
+    from gzip import GzipFile
+    register_compression(
+        "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
+    )
+try:
+    from lzma import LZMAFile
+    register_compression("lzma", LZMAFile, "lzma")
+    register_compression("xz", LZMAFile, "xz")
+except ImportError:
+    pass
+try:
+    import lzmaffi
+    register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
+    register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
+except ImportError:
+    pass
+class SnappyFile(AbstractBufferedFile):
+    def __init__(self, infile, mode, **kwargs):
+        import snappy
+        super().__init__(
+            fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
+        )
+        self.infile = infile
+        if "r" in mode:
+            self.codec = snappy.StreamDecompressor()
+        else:
+            self.codec = snappy.StreamCompressor()
+    def _upload_chunk(self, final=False):
+        self.buffer.seek(0)
+        out = self.codec.add_chunk(self.buffer.read())
+        self.infile.write(out)
+        return True
+    def seek(self, loc, whence=0):
+        raise NotImplementedError("SnappyFile is not seekable")
+    def seekable(self):
+        return False
+    def _fetch_range(self, start, end):
+        """Get the specified set of bytes from remote"""
+        data = self.infile.read(end - start)
+        return self.codec.decompress(data)
+try:
+    import snappy
+    snappy.compress(b"")
+    # Snappy may use the .sz file extension, but this is not part of the
+    # standard implementation.
+    register_compression("snappy", SnappyFile, [])
+except (ImportError, NameError, AttributeError):
+    pass
+try:
+    import lz4.frame
+    register_compression("lz4", lz4.frame.open, "lz4")
+except ImportError:
+    pass
+try:
+    import zstandard as zstd
+    def zstandard_file(infile, mode="rb"):
+        if "r" in mode:
+            cctx = zstd.ZstdDecompressor()
+            return cctx.stream_reader(infile)
+        else:
+            cctx = zstd.ZstdCompressor(level=10)
+            return cctx.stream_writer(infile)
+    register_compression("zstd", zstandard_file, "zst")
+except ImportError:
+    pass
+def available_compressions():
+    """Return a list of the implemented compressions."""
+    return list(compr)

venv/lib/python3.12/site-packages/fsspec/config.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from __future__ import annotations
+import configparser
+import json
+import os
+import warnings
+from typing import Any
+conf: dict[str, dict[str, Any]] = {}
+default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
+conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
+def set_conf_env(conf_dict, envdict=os.environ):
+    """Set config values from environment variables
+    Looks for variables of the form ``FSSPEC_<protocol>`` and
+    ``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
+    as a json dictionary and used to ``update`` the config of the
+    corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
+    attempt to convert the string value, but the kwarg keys will be lower-cased.
+    The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
+    ``FSSPEC_<protocol>`` ones.
+    Parameters
+    ----------
+    conf_dict : dict(str, dict)
+        This dict will be mutated
+    envdict : dict-like(str, str)
+        Source for the values - usually the real environment
+    """
+    kwarg_keys = []
+    for key in envdict:
+        if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
+            if key.count("_") > 1:
+                kwarg_keys.append(key)
+                continue
+            try:
+                value = json.loads(envdict[key])
+            except json.decoder.JSONDecodeError as ex:
+                warnings.warn(
+                    f"Ignoring environment variable {key} due to a parse failure: {ex}"
+                )
+            else:
+                if isinstance(value, dict):
+                    _, proto = key.split("_", 1)
+                    conf_dict.setdefault(proto.lower(), {}).update(value)
+                else:
+                    warnings.warn(
+                        f"Ignoring environment variable {key} due to not being a dict:"
+                        f" {type(value)}"
+                    )
+        elif key.startswith("FSSPEC"):
+            warnings.warn(
+                f"Ignoring environment variable {key} due to having an unexpected name"
+            )
+    for key in kwarg_keys:
+        _, proto, kwarg = key.split("_", 2)
+        conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
+def set_conf_files(cdir, conf_dict):
+    """Set config values from files
+    Scans for INI and JSON files in the given dictionary, and uses their
+    contents to set the config. In case of repeated values, later values
+    win.
+    In the case of INI files, all values are strings, and these will not
+    be converted.
+    Parameters
+    ----------
+    cdir : str
+        Directory to search
+    conf_dict : dict(str, dict)
+        This dict will be mutated
+    """
+    if not os.path.isdir(cdir):
+        return
+    allfiles = sorted(os.listdir(cdir))
+    for fn in allfiles:
+        if fn.endswith(".ini"):
+            ini = configparser.ConfigParser()
+            ini.read(os.path.join(cdir, fn))
+            for key in ini:
+                if key == "DEFAULT":
+                    continue
+                conf_dict.setdefault(key, {}).update(dict(ini[key]))
+        if fn.endswith(".json"):
+            with open(os.path.join(cdir, fn)) as f:
+                js = json.load(f)
+            for key in js:
+                conf_dict.setdefault(key, {}).update(dict(js[key]))
+def apply_config(cls, kwargs, conf_dict=None):
+    """Supply default values for kwargs when instantiating class
+    Augments the passed kwargs, by finding entries in the config dict
+    which match the classes ``.protocol`` attribute (one or more str)
+    Parameters
+    ----------
+    cls : file system implementation
+    kwargs : dict
+    conf_dict : dict of dict
+        Typically this is the global configuration
+    Returns
+    -------
+    dict : the modified set of kwargs
+    """
+    if conf_dict is None:
+        conf_dict = conf
+    protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
+    kw = {}
+    for proto in protos:
+        # default kwargs from the current state of the config
+        if proto in conf_dict:
+            kw.update(conf_dict[proto])
+    # explicit kwargs always win
+    kw.update(**kwargs)
+    kwargs = kw
+    return kwargs
+set_conf_files(conf_dir, conf)
+set_conf_env(conf)

venv/lib/python3.12/site-packages/fsspec/conftest.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import shutil
+import subprocess
+import sys
+import time
+import pytest
+import fsspec
+from fsspec.implementations.cached import CachingFileSystem
+@pytest.fixture()
+def m():
+    """
+    Fixture providing a memory filesystem.
+    """
+    m = fsspec.filesystem("memory")
+    m.store.clear()
+    m.pseudo_dirs.clear()
+    m.pseudo_dirs.append("")
+    try:
+        yield m
+    finally:
+        m.store.clear()
+        m.pseudo_dirs.clear()
+        m.pseudo_dirs.append("")
+@pytest.fixture
+def ftp_writable(tmpdir):
+    """
+    Fixture providing a writable FTP filesystem.
+    """
+    pytest.importorskip("pyftpdlib")
+    from fsspec.implementations.ftp import FTPFileSystem
+    FTPFileSystem.clear_instance_cache()  # remove lingering connections
+    CachingFileSystem.clear_instance_cache()
+    d = str(tmpdir)
+    with open(os.path.join(d, "out"), "wb") as f:
+        f.write(b"hello" * 10000)
+    P = subprocess.Popen(
+        [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
+    )
+    try:
+        time.sleep(1)
+        yield "localhost", 2121, "user", "pass"
+    finally:
+        P.terminate()
+        P.wait()
+        try:
+            shutil.rmtree(tmpdir)
+        except Exception:
+            pass

venv/lib/python3.12/site-packages/fsspec/core.py ADDED Viewed

	@@ -0,0 +1,743 @@

+from __future__ import annotations
+import io
+import logging
+import os
+import re
+from glob import has_magic
+from pathlib import Path
+# for backwards compat, we export cache things from here too
+from fsspec.caching import (  # noqa: F401
+    BaseCache,
+    BlockCache,
+    BytesCache,
+    MMapCache,
+    ReadAheadCache,
+    caches,
+)
+from fsspec.compression import compr
+from fsspec.config import conf
+from fsspec.registry import filesystem, get_filesystem_class
+from fsspec.utils import (
+    _unstrip_protocol,
+    build_name_function,
+    infer_compression,
+    stringify_path,
+)
+logger = logging.getLogger("fsspec")
+class OpenFile:
+    """
+    File-like object to be used in a context
+    Can layer (buffered) text-mode and compression over any file-system, which
+    are typically binary-only.
+    These instances are safe to serialize, as the low-level file object
+    is not created until invoked using ``with``.
+    Parameters
+    ----------
+    fs: FileSystem
+        The file system to use for opening the file. Should be a subclass or duck-type
+        with ``fsspec.spec.AbstractFileSystem``
+    path: str
+        Location to open
+    mode: str like 'rb', optional
+        Mode of the opened file
+    compression: str or None, optional
+        Compression to apply
+    encoding: str or None, optional
+        The encoding to use if opened in text mode.
+    errors: str or None, optional
+        How to handle encoding errors if opened in text mode.
+    newline: None or str
+        Passed to TextIOWrapper in text mode, how to handle line endings.
+    autoopen: bool
+        If True, calls open() immediately. Mostly used by pickle
+    pos: int
+        If given and autoopen is True, seek to this location immediately
+    """
+    def __init__(
+        self,
+        fs,
+        path,
+        mode="rb",
+        compression=None,
+        encoding=None,
+        errors=None,
+        newline=None,
+    ):
+        self.fs = fs
+        self.path = path
+        self.mode = mode
+        self.compression = get_compression(path, compression)
+        self.encoding = encoding
+        self.errors = errors
+        self.newline = newline
+        self.fobjects = []
+    def __reduce__(self):
+        return (
+            OpenFile,
+            (
+                self.fs,
+                self.path,
+                self.mode,
+                self.compression,
+                self.encoding,
+                self.errors,
+                self.newline,
+            ),
+        )
+    def __repr__(self):
+        return f"<OpenFile '{self.path}'>"
+    def __enter__(self):
+        mode = self.mode.replace("t", "").replace("b", "") + "b"
+        try:
+            f = self.fs.open(self.path, mode=mode)
+        except FileNotFoundError as e:
+            if has_magic(self.path):
+                raise FileNotFoundError(
+                    "%s not found. The URL contains glob characters: you maybe needed\n"
+                    "to pass expand=True in fsspec.open() or the storage_options of \n"
+                    "your library. You can also set the config value 'open_expand'\n"
+                    "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
+                    self.path,
+                ) from e
+            raise
+        self.fobjects = [f]
+        if self.compression is not None:
+            compress = compr[self.compression]
+            f = compress(f, mode=mode[0])
+            self.fobjects.append(f)
+        if "b" not in self.mode:
+            # assume, for example, that 'r' is equivalent to 'rt' as in builtin
+            f = PickleableTextIOWrapper(
+                f, encoding=self.encoding, errors=self.errors, newline=self.newline
+            )
+            self.fobjects.append(f)
+        return self.fobjects[-1]
+    def __exit__(self, *args):
+        self.close()
+    @property
+    def full_name(self):
+        return _unstrip_protocol(self.path, self.fs)
+    def open(self):
+        """Materialise this as a real open file without context
+        The OpenFile object should be explicitly closed to avoid enclosed file
+        instances persisting. You must, therefore, keep a reference to the OpenFile
+        during the life of the file-like it generates.
+        """
+        return self.__enter__()
+    def close(self):
+        """Close all encapsulated file objects"""
+        for f in reversed(self.fobjects):
+            if "r" not in self.mode and not f.closed:
+                f.flush()
+            f.close()
+        self.fobjects.clear()
+class OpenFiles(list):
+    """List of OpenFile instances
+    Can be used in a single context, which opens and closes all of the
+    contained files. Normal list access to get the elements works as
+    normal.
+    A special case is made for caching filesystems - the files will
+    be down/uploaded together at the start or end of the context, and
+    this may happen concurrently, if the target filesystem supports it.
+    """
+    def __init__(self, *args, mode="rb", fs=None):
+        self.mode = mode
+        self.fs = fs
+        self.files = []
+        super().__init__(*args)
+    def __enter__(self):
+        if self.fs is None:
+            raise ValueError("Context has already been used")
+        fs = self.fs
+        while True:
+            if hasattr(fs, "open_many"):
+                # check for concurrent cache download; or set up for upload
+                self.files = fs.open_many(self)
+                return self.files
+            if hasattr(fs, "fs") and fs.fs is not None:
+                fs = fs.fs
+            else:
+                break
+        return [s.__enter__() for s in self]
+    def __exit__(self, *args):
+        fs = self.fs
+        [s.__exit__(*args) for s in self]
+        if "r" not in self.mode:
+            while True:
+                if hasattr(fs, "open_many"):
+                    # check for concurrent cache upload
+                    fs.commit_many(self.files)
+                    return
+                if hasattr(fs, "fs") and fs.fs is not None:
+                    fs = fs.fs
+                else:
+                    break
+    def __getitem__(self, item):
+        out = super().__getitem__(item)
+        if isinstance(item, slice):
+            return OpenFiles(out, mode=self.mode, fs=self.fs)
+        return out
+    def __repr__(self):
+        return f"<List of {len(self)} OpenFile instances>"
+def open_files(
+    urlpath,
+    mode="rb",
+    compression=None,
+    encoding="utf8",
+    errors=None,
+    name_function=None,
+    num=1,
+    protocol=None,
+    newline=None,
+    auto_mkdir=True,
+    expand=True,
+    **kwargs,
+):
+    """Given a path or paths, return a list of ``OpenFile`` objects.
+    For writing, a str path must contain the "*" character, which will be filled
+    in by increasing numbers, e.g., "part*" ->  "part1", "part2" if num=2.
+    For either reading or writing, can instead provide explicit list of paths.
+    Parameters
+    ----------
+    urlpath: string or list
+        Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
+        to read from alternative filesystems. To read from multiple files you
+        can pass a globstring or a list of paths, with the caveat that they
+        must all have the same protocol.
+    mode: 'rb', 'wt', etc.
+    compression: string or None
+        If given, open file using compression codec. Can either be a compression
+        name (a key in ``fsspec.compression.compr``) or "infer" to guess the
+        compression from the filename suffix.
+    encoding: str
+        For text mode only
+    errors: None or str
+        Passed to TextIOWrapper in text mode
+    name_function: function or None
+        if opening a set of files for writing, those files do not yet exist,
+        so we need to generate their names by formatting the urlpath for
+        each sequence number
+    num: int [1]
+        if writing mode, number of files we expect to create (passed to
+        name+function)
+    protocol: str or None
+        If given, overrides the protocol found in the URL.
+    newline: bytes or None
+        Used for line terminator in text mode. If None, uses system default;
+        if blank, uses no translation.
+    auto_mkdir: bool (True)
+        If in write mode, this will ensure the target directory exists before
+        writing, by calling ``fs.mkdirs(exist_ok=True)``.
+    expand: bool
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Examples
+    --------
+    >>> files = open_files('2015-*-*.csv')  # doctest: +SKIP
+    >>> files = open_files(
+    ...     's3://bucket/2015-*-*.csv.gz', compression='gzip'
+    ... )  # doctest: +SKIP
+    Returns
+    -------
+    An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
+    be used as a single context
+    Notes
+    -----
+    For a full list of the available protocols and the implementations that
+    they map across to see the latest online documentation:
+    - For implementations built into ``fsspec`` see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
+    - For implementations in separate packages see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
+    """
+    fs, fs_token, paths = get_fs_token_paths(
+        urlpath,
+        mode,
+        num=num,
+        name_function=name_function,
+        storage_options=kwargs,
+        protocol=protocol,
+        expand=expand,
+    )
+    if fs.protocol == "file":
+        fs.auto_mkdir = auto_mkdir
+    elif "r" not in mode and auto_mkdir:
+        parents = {fs._parent(path) for path in paths}
+        for parent in parents:
+            try:
+                fs.makedirs(parent, exist_ok=True)
+            except PermissionError:
+                pass
+    return OpenFiles(
+        [
+            OpenFile(
+                fs,
+                path,
+                mode=mode,
+                compression=compression,
+                encoding=encoding,
+                errors=errors,
+                newline=newline,
+            )
+            for path in paths
+        ],
+        mode=mode,
+        fs=fs,
+    )
+def _un_chain(path, kwargs):
+    # Avoid a circular import
+    from fsspec.implementations.cached import CachingFileSystem
+    if "::" in path:
+        x = re.compile(".*[^a-z]+.*")  # test for non protocol-like single word
+        bits = []
+        for p in path.split("::"):
+            if "://" in p or x.match(p):
+                bits.append(p)
+            else:
+                bits.append(p + "://")
+    else:
+        bits = [path]
+    # [[url, protocol, kwargs], ...]
+    out = []
+    previous_bit = None
+    kwargs = kwargs.copy()
+    for bit in reversed(bits):
+        protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
+        cls = get_filesystem_class(protocol)
+        extra_kwargs = cls._get_kwargs_from_urls(bit)
+        kws = kwargs.pop(protocol, {})
+        if bit is bits[0]:
+            kws.update(kwargs)
+        kw = dict(
+            **{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
+            **kws,
+        )
+        bit = cls._strip_protocol(bit)
+        if "target_protocol" not in kw and issubclass(cls, CachingFileSystem):
+            bit = previous_bit
+        out.append((bit, protocol, kw))
+        previous_bit = bit
+    out.reverse()
+    return out
+def url_to_fs(url, **kwargs):
+    """
+    Turn fully-qualified and potentially chained URL into filesystem instance
+    Parameters
+    ----------
+    url : str
+        The fsspec-compatible URL
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Returns
+    -------
+    filesystem : FileSystem
+        The new filesystem discovered from ``url`` and created with
+        ``**kwargs``.
+    urlpath : str
+        The file-systems-specific URL for ``url``.
+    """
+    url = stringify_path(url)
+    # non-FS arguments that appear in fsspec.open()
+    # inspect could keep this in sync with open()'s signature
+    known_kwargs = {
+        "compression",
+        "encoding",
+        "errors",
+        "expand",
+        "mode",
+        "name_function",
+        "newline",
+        "num",
+    }
+    kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
+    chain = _un_chain(url, kwargs)
+    inkwargs = {}
+    # Reverse iterate the chain, creating a nested target_* structure
+    for i, ch in enumerate(reversed(chain)):
+        urls, protocol, kw = ch
+        if i == len(chain) - 1:
+            inkwargs = dict(**kw, **inkwargs)
+            continue
+        inkwargs["target_options"] = dict(**kw, **inkwargs)
+        inkwargs["target_protocol"] = protocol
+        inkwargs["fo"] = urls
+    urlpath, protocol, _ = chain[0]
+    fs = filesystem(protocol, **inkwargs)
+    return fs, urlpath
+DEFAULT_EXPAND = conf.get("open_expand", False)
+def open(
+    urlpath,
+    mode="rb",
+    compression=None,
+    encoding="utf8",
+    errors=None,
+    protocol=None,
+    newline=None,
+    expand=None,
+    **kwargs,
+):
+    """Given a path or paths, return one ``OpenFile`` object.
+    Parameters
+    ----------
+    urlpath: string or list
+        Absolute or relative filepath. Prefix with a protocol like ``s3://``
+        to read from alternative filesystems. Should not include glob
+        character(s).
+    mode: 'rb', 'wt', etc.
+    compression: string or None
+        If given, open file using compression codec. Can either be a compression
+        name (a key in ``fsspec.compression.compr``) or "infer" to guess the
+        compression from the filename suffix.
+    encoding: str
+        For text mode only
+    errors: None or str
+        Passed to TextIOWrapper in text mode
+    protocol: str or None
+        If given, overrides the protocol found in the URL.
+    newline: bytes or None
+        Used for line terminator in text mode. If None, uses system default;
+        if blank, uses no translation.
+    expand: bool or None
+        Whether to regard file paths containing special glob characters as needing
+        expansion (finding the first match) or absolute. Setting False allows using
+        paths which do embed such characters. If None (default), this argument
+        takes its value from the DEFAULT_EXPAND module variable, which takes
+        its initial value from the "open_expand" config value at startup, which will
+        be False if not set.
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Examples
+    --------
+    >>> openfile = open('2015-01-01.csv')  # doctest: +SKIP
+    >>> openfile = open(
+    ...     's3://bucket/2015-01-01.csv.gz', compression='gzip'
+    ... )  # doctest: +SKIP
+    >>> with openfile as f:
+    ...     df = pd.read_csv(f)  # doctest: +SKIP
+    ...
+    Returns
+    -------
+    ``OpenFile`` object.
+    Notes
+    -----
+    For a full list of the available protocols and the implementations that
+    they map across to see the latest online documentation:
+    - For implementations built into ``fsspec`` see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
+    - For implementations in separate packages see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
+    """
+    expand = DEFAULT_EXPAND if expand is None else expand
+    out = open_files(
+        urlpath=[urlpath],
+        mode=mode,
+        compression=compression,
+        encoding=encoding,
+        errors=errors,
+        protocol=protocol,
+        newline=newline,
+        expand=expand,
+        **kwargs,
+    )
+    if not out:
+        raise FileNotFoundError(urlpath)
+    return out[0]
+def open_local(
+    url: str | list[str] | Path | list[Path],
+    mode: str = "rb",
+    **storage_options: dict,
+) -> str | list[str]:
+    """Open file(s) which can be resolved to local
+    For files which either are local, or get downloaded upon open
+    (e.g., by file caching)
+    Parameters
+    ----------
+    url: str or list(str)
+    mode: str
+        Must be read mode
+    storage_options:
+        passed on to FS for or used by open_files (e.g., compression)
+    """
+    if "r" not in mode:
+        raise ValueError("Can only ensure local files when reading")
+    of = open_files(url, mode=mode, **storage_options)
+    if not getattr(of[0].fs, "local_file", False):
+        raise ValueError(
+            "open_local can only be used on a filesystem which"
+            " has attribute local_file=True"
+        )
+    with of as files:
+        paths = [f.name for f in files]
+    if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
+        return paths[0]
+    return paths
+def get_compression(urlpath, compression):
+    if compression == "infer":
+        compression = infer_compression(urlpath)
+    if compression is not None and compression not in compr:
+        raise ValueError(f"Compression type {compression} not supported")
+    return compression
+def split_protocol(urlpath):
+    """Return protocol, path pair"""
+    urlpath = stringify_path(urlpath)
+    if "://" in urlpath:
+        protocol, path = urlpath.split("://", 1)
+        if len(protocol) > 1:
+            # excludes Windows paths
+            return protocol, path
+    if urlpath.startswith("data:"):
+        return urlpath.split(":", 1)
+    return None, urlpath
+def strip_protocol(urlpath):
+    """Return only path part of full URL, according to appropriate backend"""
+    protocol, _ = split_protocol(urlpath)
+    cls = get_filesystem_class(protocol)
+    return cls._strip_protocol(urlpath)
+def expand_paths_if_needed(paths, mode, num, fs, name_function):
+    """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
+    in them (read mode).
+    :param paths: list of paths
+    mode: str
+        Mode in which to open files.
+    num: int
+        If opening in writing mode, number of files we expect to create.
+    fs: filesystem object
+    name_function: callable
+        If opening in writing mode, this callable is used to generate path
+        names. Names are generated for each partition by
+        ``urlpath.replace('*', name_function(partition_index))``.
+    :return: list of paths
+    """
+    expanded_paths = []
+    paths = list(paths)
+    if "w" in mode:  # read mode
+        if sum(1 for p in paths if "*" in p) > 1:
+            raise ValueError(
+                "When writing data, only one filename mask can be specified."
+            )
+        num = max(num, len(paths))
+        for curr_path in paths:
+            if "*" in curr_path:
+                # expand using name_function
+                expanded_paths.extend(_expand_paths(curr_path, name_function, num))
+            else:
+                expanded_paths.append(curr_path)
+        # if we generated more paths that asked for, trim the list
+        if len(expanded_paths) > num:
+            expanded_paths = expanded_paths[:num]
+    else:  # read mode
+        for curr_path in paths:
+            if has_magic(curr_path):
+                # expand using glob
+                expanded_paths.extend(fs.glob(curr_path))
+            else:
+                expanded_paths.append(curr_path)
+    return expanded_paths
+def get_fs_token_paths(
+    urlpath,
+    mode="rb",
+    num=1,
+    name_function=None,
+    storage_options=None,
+    protocol=None,
+    expand=True,
+):
+    """Filesystem, deterministic token, and paths from a urlpath and options.
+    Parameters
+    ----------
+    urlpath: string or iterable
+        Absolute or relative filepath, URL (may include protocols like
+        ``s3://``), or globstring pointing to data.
+    mode: str, optional
+        Mode in which to open files.
+    num: int, optional
+        If opening in writing mode, number of files we expect to create.
+    name_function: callable, optional
+        If opening in writing mode, this callable is used to generate path
+        names. Names are generated for each partition by
+        ``urlpath.replace('*', name_function(partition_index))``.
+    storage_options: dict, optional
+        Additional keywords to pass to the filesystem class.
+    protocol: str or None
+        To override the protocol specifier in the URL
+    expand: bool
+        Expand string paths for writing, assuming the path is a directory
+    """
+    if isinstance(urlpath, (list, tuple, set)):
+        if not urlpath:
+            raise ValueError("empty urlpath sequence")
+        urlpath0 = stringify_path(next(iter(urlpath)))
+    else:
+        urlpath0 = stringify_path(urlpath)
+    storage_options = storage_options or {}
+    if protocol:
+        storage_options["protocol"] = protocol
+    chain = _un_chain(urlpath0, storage_options or {})
+    inkwargs = {}
+    # Reverse iterate the chain, creating a nested target_* structure
+    for i, ch in enumerate(reversed(chain)):
+        urls, nested_protocol, kw = ch
+        if i == len(chain) - 1:
+            inkwargs = dict(**kw, **inkwargs)
+            continue
+        inkwargs["target_options"] = dict(**kw, **inkwargs)
+        inkwargs["target_protocol"] = nested_protocol
+        inkwargs["fo"] = urls
+    paths, protocol, _ = chain[0]
+    fs = filesystem(protocol, **inkwargs)
+    if isinstance(urlpath, (list, tuple, set)):
+        pchains = [
+            _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
+        ]
+        if len({pc[1] for pc in pchains}) > 1:
+            raise ValueError("Protocol mismatch getting fs from %s", urlpath)
+        paths = [pc[0] for pc in pchains]
+    else:
+        paths = fs._strip_protocol(paths)
+    if isinstance(paths, (list, tuple, set)):
+        if expand:
+            paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
+        elif not isinstance(paths, list):
+            paths = list(paths)
+    else:
+        if ("w" in mode or "x" in mode) and expand:
+            paths = _expand_paths(paths, name_function, num)
+        elif "*" in paths:
+            paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
+        else:
+            paths = [paths]
+    return fs, fs._fs_token, paths
+def _expand_paths(path, name_function, num):
+    if isinstance(path, str):
+        if path.count("*") > 1:
+            raise ValueError("Output path spec must contain exactly one '*'.")
+        elif "*" not in path:
+            path = os.path.join(path, "*.part")
+        if name_function is None:
+            name_function = build_name_function(num - 1)
+        paths = [path.replace("*", name_function(i)) for i in range(num)]
+        if paths != sorted(paths):
+            logger.warning(
+                "In order to preserve order between partitions"
+                " paths created with ``name_function`` should "
+                "sort to partition order"
+            )
+    elif isinstance(path, (tuple, list)):
+        assert len(path) == num
+        paths = list(path)
+    else:
+        raise ValueError(
+            "Path should be either\n"
+            "1. A list of paths: ['foo.json', 'bar.json', ...]\n"
+            "2. A directory: 'foo/\n"
+            "3. A path with a '*' in it: 'foo.*.json'"
+        )
+    return paths
+class PickleableTextIOWrapper(io.TextIOWrapper):
+    """TextIOWrapper cannot be pickled. This solves it.
+    Requires that ``buffer`` be pickleable, which all instances of
+    AbstractBufferedFile are.
+    """
+    def __init__(
+        self,
+        buffer,
+        encoding=None,
+        errors=None,
+        newline=None,
+        line_buffering=False,
+        write_through=False,
+    ):
+        self.args = buffer, encoding, errors, newline, line_buffering, write_through
+        super().__init__(*self.args)
+    def __reduce__(self):
+        return PickleableTextIOWrapper, self.args

venv/lib/python3.12/site-packages/fsspec/dircache.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import time
+from collections.abc import MutableMapping
+from functools import lru_cache
+class DirCache(MutableMapping):
+    """
+    Caching of directory listings, in a structure like::
+        {"path0": [
+            {"name": "path0/file0",
+             "size": 123,
+             "type": "file",
+             ...
+            },
+            {"name": "path0/file1",
+            },
+            ...
+            ],
+         "path1": [...]
+        }
+    Parameters to this class control listing expiry or indeed turn
+    caching off
+    """
+    def __init__(
+        self,
+        use_listings_cache=True,
+        listings_expiry_time=None,
+        max_paths=None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        use_listings_cache: bool
+            If False, this cache never returns items, but always reports KeyError,
+            and setting items has no effect
+        listings_expiry_time: int or float (optional)
+            Time in seconds that a listing is considered valid. If None,
+            listings do not expire.
+        max_paths: int (optional)
+            The number of most recent listings that are considered valid; 'recent'
+            refers to when the entry was set.
+        """
+        self._cache = {}
+        self._times = {}
+        if max_paths:
+            self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
+        self.use_listings_cache = use_listings_cache
+        self.listings_expiry_time = listings_expiry_time
+        self.max_paths = max_paths
+    def __getitem__(self, item):
+        if self.listings_expiry_time is not None:
+            if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
+                del self._cache[item]
+        if self.max_paths:
+            self._q(item)
+        return self._cache[item]  # maybe raises KeyError
+    def clear(self):
+        self._cache.clear()
+    def __len__(self):
+        return len(self._cache)
+    def __contains__(self, item):
+        try:
+            self[item]
+            return True
+        except KeyError:
+            return False
+    def __setitem__(self, key, value):
+        if not self.use_listings_cache:
+            return
+        if self.max_paths:
+            self._q(key)
+        self._cache[key] = value
+        if self.listings_expiry_time is not None:
+            self._times[key] = time.time()
+    def __delitem__(self, key):
+        del self._cache[key]
+    def __iter__(self):
+        entries = list(self._cache)
+        return (k for k in entries if k in self)
+    def __reduce__(self):
+        return (
+            DirCache,
+            (self.use_listings_cache, self.listings_expiry_time, self.max_paths),
+        )

venv/lib/python3.12/site-packages/fsspec/exceptions.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""
+fsspec user-defined exception classes
+"""
+import asyncio
+class BlocksizeMismatchError(ValueError):
+    """
+    Raised when a cached file is opened with a different blocksize than it was
+    written with
+    """
+class FSTimeoutError(asyncio.TimeoutError):
+    """
+    Raised when a fsspec function timed out occurs
+    """

venv/lib/python3.12/site-packages/fsspec/fuse.py ADDED Viewed

	@@ -0,0 +1,324 @@

+import argparse
+import logging
+import os
+import stat
+import threading
+import time
+from errno import EIO, ENOENT
+from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
+from fsspec import __version__
+from fsspec.core import url_to_fs
+logger = logging.getLogger("fsspec.fuse")
+class FUSEr(Operations):
+    def __init__(self, fs, path, ready_file=False):
+        self.fs = fs
+        self.cache = {}
+        self.root = path.rstrip("/") + "/"
+        self.counter = 0
+        logger.info("Starting FUSE at %s", path)
+        self._ready_file = ready_file
+    def getattr(self, path, fh=None):
+        logger.debug("getattr %s", path)
+        if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
+            return {"type": "file", "st_size": 5}
+        path = "".join([self.root, path.lstrip("/")]).rstrip("/")
+        try:
+            info = self.fs.info(path)
+        except FileNotFoundError as exc:
+            raise FuseOSError(ENOENT) from exc
+        data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
+        perm = info.get("mode", 0o777)
+        if info["type"] != "file":
+            data["st_mode"] = stat.S_IFDIR | perm
+            data["st_size"] = 0
+            data["st_blksize"] = 0
+        else:
+            data["st_mode"] = stat.S_IFREG | perm
+            data["st_size"] = info["size"]
+            data["st_blksize"] = 5 * 2**20
+            data["st_nlink"] = 1
+        data["st_atime"] = info["atime"] if "atime" in info else time.time()
+        data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
+        data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
+        return data
+    def readdir(self, path, fh):
+        logger.debug("readdir %s", path)
+        path = "".join([self.root, path.lstrip("/")])
+        files = self.fs.ls(path, False)
+        files = [os.path.basename(f.rstrip("/")) for f in files]
+        return [".", ".."] + files
+    def mkdir(self, path, mode):
+        path = "".join([self.root, path.lstrip("/")])
+        self.fs.mkdir(path)
+        return 0
+    def rmdir(self, path):
+        path = "".join([self.root, path.lstrip("/")])
+        self.fs.rmdir(path)
+        return 0
+    def read(self, path, size, offset, fh):
+        logger.debug("read %s", (path, size, offset))
+        if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
+            # status indicator
+            return b"ready"
+        f = self.cache[fh]
+        f.seek(offset)
+        out = f.read(size)
+        return out
+    def write(self, path, data, offset, fh):
+        logger.debug("write %s", (path, offset))
+        f = self.cache[fh]
+        f.seek(offset)
+        f.write(data)
+        return len(data)
+    def create(self, path, flags, fi=None):
+        logger.debug("create %s", (path, flags))
+        fn = "".join([self.root, path.lstrip("/")])
+        self.fs.touch(fn)  # OS will want to get attributes immediately
+        f = self.fs.open(fn, "wb")
+        self.cache[self.counter] = f
+        self.counter += 1
+        return self.counter - 1
+    def open(self, path, flags):
+        logger.debug("open %s", (path, flags))
+        fn = "".join([self.root, path.lstrip("/")])
+        if flags % 2 == 0:
+            # read
+            mode = "rb"
+        else:
+            # write/create
+            mode = "wb"
+        self.cache[self.counter] = self.fs.open(fn, mode)
+        self.counter += 1
+        return self.counter - 1
+    def truncate(self, path, length, fh=None):
+        fn = "".join([self.root, path.lstrip("/")])
+        if length != 0:
+            raise NotImplementedError
+        # maybe should be no-op since open with write sets size to zero anyway
+        self.fs.touch(fn)
+    def unlink(self, path):
+        fn = "".join([self.root, path.lstrip("/")])
+        try:
+            self.fs.rm(fn, False)
+        except (OSError, FileNotFoundError) as exc:
+            raise FuseOSError(EIO) from exc
+    def release(self, path, fh):
+        try:
+            if fh in self.cache:
+                f = self.cache[fh]
+                f.close()
+                self.cache.pop(fh)
+        except Exception as e:
+            print(e)
+        return 0
+    def chmod(self, path, mode):
+        if hasattr(self.fs, "chmod"):
+            path = "".join([self.root, path.lstrip("/")])
+            return self.fs.chmod(path, mode)
+        raise NotImplementedError
+def run(
+    fs,
+    path,
+    mount_point,
+    foreground=True,
+    threads=False,
+    ready_file=False,
+    ops_class=FUSEr,
+):
+    """Mount stuff in a local directory
+    This uses fusepy to make it appear as if a given path on an fsspec
+    instance is in fact resident within the local file-system.
+    This requires that fusepy by installed, and that FUSE be available on
+    the system (typically requiring a package to be installed with
+    apt, yum, brew, etc.).
+    Parameters
+    ----------
+    fs: file-system instance
+        From one of the compatible implementations
+    path: str
+        Location on that file-system to regard as the root directory to
+        mount. Note that you typically should include the terminating "/"
+        character.
+    mount_point: str
+        An empty directory on the local file-system where the contents of
+        the remote path will appear.
+    foreground: bool
+        Whether or not calling this function will block. Operation will
+        typically be more stable if True.
+    threads: bool
+        Whether or not to create threads when responding to file operations
+        within the mounter directory. Operation will typically be more
+        stable if False.
+    ready_file: bool
+        Whether the FUSE process is ready. The ``.fuse_ready`` file will
+        exist in the ``mount_point`` directory if True. Debugging purpose.
+    ops_class: FUSEr or Subclass of FUSEr
+        To override the default behavior of FUSEr. For Example, logging
+        to file.
+    """
+    func = lambda: FUSE(
+        ops_class(fs, path, ready_file=ready_file),
+        mount_point,
+        nothreads=not threads,
+        foreground=foreground,
+    )
+    if not foreground:
+        th = threading.Thread(target=func)
+        th.daemon = True
+        th.start()
+        return th
+    else:  # pragma: no cover
+        try:
+            func()
+        except KeyboardInterrupt:
+            pass
+def main(args):
+    """Mount filesystem from chained URL to MOUNT_POINT.
+    Examples:
+    python3 -m fsspec.fuse memory /usr/share /tmp/mem
+    python3 -m fsspec.fuse local /tmp/source /tmp/local \\
+            -l /tmp/fsspecfuse.log
+    You can also mount chained-URLs and use special settings:
+    python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
+            / /tmp/zip \\
+            -o 'filecache-cache_storage=/tmp/simplecache'
+    You can specify the type of the setting by using `[int]` or `[bool]`,
+    (`true`, `yes`, `1` represents the Boolean value `True`):
+    python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
+            /historic/packages/RPMS /tmp/ftp \\
+            -o 'simplecache-cache_storage=/tmp/simplecache' \\
+            -o 'simplecache-check_files=false[bool]' \\
+            -o 'ftp-listings_expiry_time=60[int]' \\
+            -o 'ftp-username=anonymous' \\
+            -o 'ftp-password=xieyanbo'
+    """
+    class RawDescriptionArgumentParser(argparse.ArgumentParser):
+        def format_help(self):
+            usage = super().format_help()
+            parts = usage.split("\n\n")
+            parts[1] = self.description.rstrip()
+            return "\n\n".join(parts)
+    parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
+    parser.add_argument("--version", action="version", version=__version__)
+    parser.add_argument("url", type=str, help="fs url")
+    parser.add_argument("source_path", type=str, help="source directory in fs")
+    parser.add_argument("mount_point", type=str, help="local directory")
+    parser.add_argument(
+        "-o",
+        "--option",
+        action="append",
+        help="Any options of protocol included in the chained URL",
+    )
+    parser.add_argument(
+        "-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
+    )
+    parser.add_argument(
+        "-f",
+        "--foreground",
+        action="store_false",
+        help="Running in foreground or not (Default: False)",
+    )
+    parser.add_argument(
+        "-t",
+        "--threads",
+        action="store_false",
+        help="Running with threads support (Default: False)",
+    )
+    parser.add_argument(
+        "-r",
+        "--ready-file",
+        action="store_false",
+        help="The `.fuse_ready` file will exist after FUSE is ready. "
+        "(Debugging purpose, Default: False)",
+    )
+    args = parser.parse_args(args)
+    kwargs = {}
+    for item in args.option or []:
+        key, sep, value = item.partition("=")
+        if not sep:
+            parser.error(message=f"Wrong option: {item!r}")
+        val = value.lower()
+        if val.endswith("[int]"):
+            value = int(value[: -len("[int]")])
+        elif val.endswith("[bool]"):
+            value = val[: -len("[bool]")] in ["1", "yes", "true"]
+        if "-" in key:
+            fs_name, setting_name = key.split("-", 1)
+            if fs_name in kwargs:
+                kwargs[fs_name][setting_name] = value
+            else:
+                kwargs[fs_name] = {setting_name: value}
+        else:
+            kwargs[key] = value
+    if args.log_file:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            filename=args.log_file,
+            format="%(asctime)s %(message)s",
+        )
+        class LoggingFUSEr(FUSEr, LoggingMixIn):
+            pass
+        fuser = LoggingFUSEr
+    else:
+        fuser = FUSEr
+    fs, url_path = url_to_fs(args.url, **kwargs)
+    logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
+    run(
+        fs,
+        args.source_path,
+        args.mount_point,
+        foreground=args.foreground,
+        threads=args.threads,
+        ready_file=args.ready_file,
+        ops_class=fuser,
+    )
+if __name__ == "__main__":
+    import sys
+    main(sys.argv[1:])

venv/lib/python3.12/site-packages/fsspec/generic.py ADDED Viewed

	@@ -0,0 +1,395 @@

+from __future__ import annotations
+import inspect
+import logging
+import os
+import shutil
+import uuid
+from typing import Optional
+from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
+from .callbacks import DEFAULT_CALLBACK
+from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
+_generic_fs = {}
+logger = logging.getLogger("fsspec.generic")
+def set_generic_fs(protocol, **storage_options):
+    """Populate the dict used for method=="generic" lookups"""
+    _generic_fs[protocol] = filesystem(protocol, **storage_options)
+def _resolve_fs(url, method, protocol=None, storage_options=None):
+    """Pick instance of backend FS"""
+    url = url[0] if isinstance(url, (list, tuple)) else url
+    protocol = protocol or split_protocol(url)[0]
+    storage_options = storage_options or {}
+    if method == "default":
+        return filesystem(protocol)
+    if method == "generic":
+        return _generic_fs[protocol]
+    if method == "current":
+        cls = get_filesystem_class(protocol)
+        return cls.current()
+    if method == "options":
+        fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
+        return fs
+    raise ValueError(f"Unknown FS resolution method: {method}")
+def rsync(
+    source,
+    destination,
+    delete_missing=False,
+    source_field="size",
+    dest_field="size",
+    update_cond="different",
+    inst_kwargs=None,
+    fs=None,
+    **kwargs,
+):
+    """Sync files between two directory trees
+    (experimental)
+    Parameters
+    ----------
+    source: str
+        Root of the directory tree to take files from. This must be a directory, but
+        do not include any terminating "/" character
+    destination: str
+        Root path to copy into. The contents of this location should be
+        identical to the contents of ``source`` when done. This will be made a
+        directory, and the terminal "/" should not be included.
+    delete_missing: bool
+        If there are paths in the destination that don't exist in the
+        source and this is True, delete them. Otherwise, leave them alone.
+    source_field: str | callable
+        If ``update_field`` is "different", this is the key in the info
+        of source files to consider for difference. Maybe a function of the
+        info dict.
+    dest_field: str | callable
+        If ``update_field`` is "different", this is the key in the info
+        of destination files to consider for difference. May be a function of
+        the info dict.
+    update_cond: "different"|"always"|"never"
+        If "always", every file is copied, regardless of whether it exists in
+        the destination. If "never", files that exist in the destination are
+        not copied again. If "different" (default), only copy if the info
+        fields given by ``source_field`` and ``dest_field`` (usually "size")
+        are different. Other comparisons may be added in the future.
+    inst_kwargs: dict|None
+        If ``fs`` is None, use this set of keyword arguments to make a
+        GenericFileSystem instance
+    fs: GenericFileSystem|None
+        Instance to use if explicitly given. The instance defines how to
+        to make downstream file system instances from paths.
+    Returns
+    -------
+    dict of the copy operations that were performed, {source: destination}
+    """
+    fs = fs or GenericFileSystem(**(inst_kwargs or {}))
+    source = fs._strip_protocol(source)
+    destination = fs._strip_protocol(destination)
+    allfiles = fs.find(source, withdirs=True, detail=True)
+    if not fs.isdir(source):
+        raise ValueError("Can only rsync on a directory")
+    otherfiles = fs.find(destination, withdirs=True, detail=True)
+    dirs = [
+        a
+        for a, v in allfiles.items()
+        if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
+    ]
+    logger.debug(f"{len(dirs)} directories to create")
+    if dirs:
+        fs.make_many_dirs(
+            [dirn.replace(source, destination) for dirn in dirs], exist_ok=True
+        )
+    allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
+    logger.debug(f"{len(allfiles)} files to consider for copy")
+    to_delete = [
+        o
+        for o, v in otherfiles.items()
+        if o.replace(destination, source) not in allfiles and v["type"] == "file"
+    ]
+    for k, v in allfiles.copy().items():
+        otherfile = k.replace(source, destination)
+        if otherfile in otherfiles:
+            if update_cond == "always":
+                allfiles[k] = otherfile
+            elif update_cond == "different":
+                inf1 = source_field(v) if callable(source_field) else v[source_field]
+                v2 = otherfiles[otherfile]
+                inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
+                if inf1 != inf2:
+                    # details mismatch, make copy
+                    allfiles[k] = otherfile
+                else:
+                    # details match, don't copy
+                    allfiles.pop(k)
+        else:
+            # file not in target yet
+            allfiles[k] = otherfile
+    logger.debug(f"{len(allfiles)} files to copy")
+    if allfiles:
+        source_files, target_files = zip(*allfiles.items())
+        fs.cp(source_files, target_files, **kwargs)
+    logger.debug(f"{len(to_delete)} files to delete")
+    if delete_missing and to_delete:
+        fs.rm(to_delete)
+    return allfiles
+class GenericFileSystem(AsyncFileSystem):
+    """Wrapper over all other FS types
+    <experimental!>
+    This implementation is a single unified interface to be able to run FS operations
+    over generic URLs, and dispatch to the specific implementations using the URL
+    protocol prefix.
+    Note: instances of this FS are always async, even if you never use it with any async
+    backend.
+    """
+    protocol = "generic"  # there is no real reason to ever use a protocol with this FS
+    def __init__(self, default_method="default", storage_options=None, **kwargs):
+        """
+        Parameters
+        ----------
+        default_method: str (optional)
+            Defines how to configure backend FS instances. Options are:
+            - "default": instantiate like FSClass(), with no
+              extra arguments; this is the default instance of that FS, and can be
+              configured via the config system
+            - "generic": takes instances from the `_generic_fs` dict in this module,
+              which you must populate before use. Keys are by protocol
+            - "options": expects storage_options, a dict mapping protocol to
+              kwargs to use when constructing the filesystem
+            - "current": takes the most recently instantiated version of each FS
+        """
+        self.method = default_method
+        self.st_opts = storage_options
+        super().__init__(**kwargs)
+    def _parent(self, path):
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        return fs.unstrip_protocol(fs._parent(path))
+    def _strip_protocol(self, path):
+        # normalization only
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        return fs.unstrip_protocol(fs._strip_protocol(path))
+    async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        if fs.async_impl:
+            out = await fs._find(
+                path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
+            )
+        else:
+            out = fs.find(
+                path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
+            )
+        result = {}
+        for k, v in out.items():
+            v = v.copy()  # don't corrupt target FS dircache
+            name = fs.unstrip_protocol(k)
+            v["name"] = name
+            result[name] = v
+        if detail:
+            return result
+        return list(result)
+    async def _info(self, url, **kwargs):
+        fs = _resolve_fs(url, self.method)
+        if fs.async_impl:
+            out = await fs._info(url, **kwargs)
+        else:
+            out = fs.info(url, **kwargs)
+        out = out.copy()  # don't edit originals
+        out["name"] = fs.unstrip_protocol(out["name"])
+        return out
+    async def _ls(
+        self,
+        url,
+        detail=True,
+        **kwargs,
+    ):
+        fs = _resolve_fs(url, self.method)
+        if fs.async_impl:
+            out = await fs._ls(url, detail=True, **kwargs)
+        else:
+            out = fs.ls(url, detail=True, **kwargs)
+        out = [o.copy() for o in out]  # don't edit originals
+        for o in out:
+            o["name"] = fs.unstrip_protocol(o["name"])
+        if detail:
+            return out
+        else:
+            return [o["name"] for o in out]
+    async def _cat_file(
+        self,
+        url,
+        **kwargs,
+    ):
+        fs = _resolve_fs(url, self.method)
+        if fs.async_impl:
+            return await fs._cat_file(url, **kwargs)
+        else:
+            return fs.cat_file(url, **kwargs)
+    async def _pipe_file(
+        self,
+        path,
+        value,
+        **kwargs,
+    ):
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        if fs.async_impl:
+            return await fs._pipe_file(path, value, **kwargs)
+        else:
+            return fs.pipe_file(path, value, **kwargs)
+    async def _rm(self, url, **kwargs):
+        urls = url
+        if isinstance(urls, str):
+            urls = [urls]
+        fs = _resolve_fs(urls[0], self.method)
+        if fs.async_impl:
+            await fs._rm(urls, **kwargs)
+        else:
+            fs.rm(url, **kwargs)
+    async def _makedirs(self, path, exist_ok=False):
+        logger.debug("Make dir %s", path)
+        fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
+        if fs.async_impl:
+            await fs._makedirs(path, exist_ok=exist_ok)
+        else:
+            fs.makedirs(path, exist_ok=exist_ok)
+    def rsync(self, source, destination, **kwargs):
+        """Sync files between two directory trees
+        See `func:rsync` for more details.
+        """
+        rsync(source, destination, fs=self, **kwargs)
+    async def _cp_file(
+        self,
+        url,
+        url2,
+        blocksize=2**20,
+        callback=DEFAULT_CALLBACK,
+        tempdir: Optional[str] = None,
+        **kwargs,
+    ):
+        fs = _resolve_fs(url, self.method)
+        fs2 = _resolve_fs(url2, self.method)
+        if fs is fs2:
+            # pure remote
+            if fs.async_impl:
+                return await fs._copy(url, url2, **kwargs)
+            else:
+                return fs.copy(url, url2, **kwargs)
+        await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
+    async def _make_many_dirs(self, urls, exist_ok=True):
+        fs = _resolve_fs(urls[0], self.method)
+        if fs.async_impl:
+            coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
+            await _run_coros_in_chunks(coros)
+        else:
+            for u in urls:
+                fs.makedirs(u, exist_ok=exist_ok)
+    make_many_dirs = sync_wrapper(_make_many_dirs)
+    async def _copy(
+        self,
+        path1: list[str],
+        path2: list[str],
+        recursive: bool = False,
+        on_error: str = "ignore",
+        maxdepth: Optional[int] = None,
+        batch_size: Optional[int] = None,
+        tempdir: Optional[str] = None,
+        **kwargs,
+    ):
+        # TODO: special case for one FS being local, which can use get/put
+        # TODO: special case for one being memFS, which can use cat/pipe
+        if recursive:
+            raise NotImplementedError("Please use fsspec.generic.rsync")
+        path1 = [path1] if isinstance(path1, str) else path1
+        path2 = [path2] if isinstance(path2, str) else path2
+        fs = _resolve_fs(path1, self.method)
+        fs2 = _resolve_fs(path2, self.method)
+        if fs is fs2:
+            if fs.async_impl:
+                return await fs._copy(path1, path2, **kwargs)
+            else:
+                return fs.copy(path1, path2, **kwargs)
+        await copy_file_op(
+            fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
+        )
+async def copy_file_op(
+    fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
+):
+    import tempfile
+    tempdir = tempdir or tempfile.mkdtemp()
+    try:
+        coros = [
+            _copy_file_op(
+                fs1,
+                u1,
+                fs2,
+                u2,
+                os.path.join(tempdir, uuid.uuid4().hex),
+            )
+            for u1, u2 in zip(url1, url2)
+        ]
+        out = await _run_coros_in_chunks(
+            coros, batch_size=batch_size, return_exceptions=True
+        )
+    finally:
+        shutil.rmtree(tempdir)
+    if on_error == "return":
+        return out
+    elif on_error == "raise":
+        for o in out:
+            if isinstance(o, Exception):
+                raise o
+async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
+    if fs1.async_impl:
+        await fs1._get_file(url1, local)
+    else:
+        fs1.get_file(url1, local)
+    if fs2.async_impl:
+        await fs2._put_file(local, url2)
+    else:
+        fs2.put_file(local, url2)
+    os.unlink(local)
+    logger.debug("Copy %s -> %s; done", url1, url2)
+async def maybe_await(cor):
+    if inspect.iscoroutine(cor):
+        return await cor
+    else:
+        return cor

venv/lib/python3.12/site-packages/fsspec/gui.py ADDED Viewed

	@@ -0,0 +1,416 @@

+import ast
+import contextlib
+import logging
+import os
+import re
+from typing import ClassVar, Sequence
+import panel as pn
+from .core import OpenFile, get_filesystem_class, split_protocol
+from .registry import known_implementations
+pn.extension()
+logger = logging.getLogger("fsspec.gui")
+class SigSlot:
+    """Signal-slot mixin, for Panel event passing
+    Include this class in a widget manager's superclasses to be able to
+    register events and callbacks on Panel widgets managed by that class.
+    The method ``_register`` should be called as widgets are added, and external
+    code should call ``connect`` to associate callbacks.
+    By default, all signals emit a DEBUG logging statement.
+    """
+    # names of signals that this class may emit each of which must be
+    # set by _register for any new instance
+    signals: ClassVar[Sequence[str]] = []
+    # names of actions that this class may respond to
+    slots: ClassVar[Sequence[str]] = []
+    # each of which must be a method name
+    def __init__(self):
+        self._ignoring_events = False
+        self._sigs = {}
+        self._map = {}
+        self._setup()
+    def _setup(self):
+        """Create GUI elements and register signals"""
+        self.panel = pn.pane.PaneBase()
+        # no signals to set up in the base class
+    def _register(
+        self, widget, name, thing="value", log_level=logging.DEBUG, auto=False
+    ):
+        """Watch the given attribute of a widget and assign it a named event
+        This is normally called at the time a widget is instantiated, in the
+        class which owns it.
+        Parameters
+        ----------
+        widget : pn.layout.Panel or None
+            Widget to watch. If None, an anonymous signal not associated with
+            any widget.
+        name : str
+            Name of this event
+        thing : str
+            Attribute of the given widget to watch
+        log_level : int
+            When the signal is triggered, a logging event of the given level
+            will be fired in the dfviz logger.
+        auto : bool
+            If True, automatically connects with a method in this class of the
+            same name.
+        """
+        if name not in self.signals:
+            raise ValueError(f"Attempt to assign an undeclared signal: {name}")
+        self._sigs[name] = {
+            "widget": widget,
+            "callbacks": [],
+            "thing": thing,
+            "log": log_level,
+        }
+        wn = "-".join(
+            [
+                getattr(widget, "name", str(widget)) if widget is not None else "none",
+                thing,
+            ]
+        )
+        self._map[wn] = name
+        if widget is not None:
+            widget.param.watch(self._signal, thing, onlychanged=True)
+        if auto and hasattr(self, name):
+            self.connect(name, getattr(self, name))
+    def _repr_mimebundle_(self, *args, **kwargs):
+        """Display in a notebook or a server"""
+        try:
+            return self.panel._repr_mimebundle_(*args, **kwargs)
+        except (ValueError, AttributeError) as exc:
+            raise NotImplementedError(
+                "Panel does not seem to be set up properly"
+            ) from exc
+    def connect(self, signal, slot):
+        """Associate call back with given event
+        The callback must be a function which takes the "new" value of the
+        watched attribute as the only parameter. If the callback return False,
+        this cancels any further processing of the given event.
+        Alternatively, the callback can be a string, in which case it means
+        emitting the correspondingly-named event (i.e., connect to self)
+        """
+        self._sigs[signal]["callbacks"].append(slot)
+    def _signal(self, event):
+        """This is called by a an action on a widget
+        Within an self.ignore_events context, nothing happens.
+        Tests can execute this method by directly changing the values of
+        widget components.
+        """
+        if not self._ignoring_events:
+            wn = "-".join([event.obj.name, event.name])
+            if wn in self._map and self._map[wn] in self._sigs:
+                self._emit(self._map[wn], event.new)
+    @contextlib.contextmanager
+    def ignore_events(self):
+        """Temporarily turn off events processing in this instance
+        (does not propagate to children)
+        """
+        self._ignoring_events = True
+        try:
+            yield
+        finally:
+            self._ignoring_events = False
+    def _emit(self, sig, value=None):
+        """An event happened, call its callbacks
+        This method can be used in tests to simulate message passing without
+        directly changing visual elements.
+        Calling of callbacks will halt whenever one returns False.
+        """
+        logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
+        for callback in self._sigs[sig]["callbacks"]:
+            if isinstance(callback, str):
+                self._emit(callback)
+            else:
+                try:
+                    # running callbacks should not break the interface
+                    ret = callback(value)
+                    if ret is False:
+                        break
+                except Exception as e:
+                    logger.exception(
+                        "Exception (%s) while executing callback for signal: %s",
+                        e,
+                        sig,
+                    )
+    def show(self, threads=False):
+        """Open a new browser tab and display this instance's interface"""
+        self.panel.show(threads=threads, verbose=False)
+        return self
+class SingleSelect(SigSlot):
+    """A multiselect which only allows you to select one item for an event"""
+    signals = ["_selected", "selected"]  # the first is internal
+    slots = ["set_options", "set_selection", "add", "clear", "select"]
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        super().__init__()
+    def _setup(self):
+        self.panel = pn.widgets.MultiSelect(**self.kwargs)
+        self._register(self.panel, "_selected", "value")
+        self._register(None, "selected")
+        self.connect("_selected", self.select_one)
+    def _signal(self, *args, **kwargs):
+        super()._signal(*args, **kwargs)
+    def select_one(self, *_):
+        with self.ignore_events():
+            val = [self.panel.value[-1]] if self.panel.value else []
+            self.panel.value = val
+        self._emit("selected", self.panel.value)
+    def set_options(self, options):
+        self.panel.options = options
+    def clear(self):
+        self.panel.options = []
+    @property
+    def value(self):
+        return self.panel.value
+    def set_selection(self, selection):
+        self.panel.value = [selection]
+class FileSelector(SigSlot):
+    """Panel-based graphical file selector widget
+    Instances of this widget are interactive and can be displayed in jupyter by having
+    them as the output of a cell,  or in a separate browser tab using ``.show()``.
+    """
+    signals = [
+        "protocol_changed",
+        "selection_changed",
+        "directory_entered",
+        "home_clicked",
+        "up_clicked",
+        "go_clicked",
+        "filters_changed",
+    ]
+    slots = ["set_filters", "go_home"]
+    def __init__(self, url=None, filters=None, ignore=None, kwargs=None):
+        """
+        Parameters
+        ----------
+        url : str (optional)
+            Initial value of the URL to populate the dialog; should include protocol
+        filters : list(str) (optional)
+            File endings to include in the listings. If not included, all files are
+            allowed. Does not affect directories.
+            If given, the endings will appear as checkboxes in the interface
+        ignore : list(str) (optional)
+            Regex(s) of file basename patterns to ignore, e.g., "\\." for typical
+            hidden files on posix
+        kwargs : dict (optional)
+            To pass to file system instance
+        """
+        if url:
+            self.init_protocol, url = split_protocol(url)
+        else:
+            self.init_protocol, url = "file", os.getcwd()
+        self.init_url = url
+        self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
+        self.filters = filters
+        self.ignore = [re.compile(i) for i in ignore or []]
+        self._fs = None
+        super().__init__()
+    def _setup(self):
+        self.url = pn.widgets.TextInput(
+            name="url",
+            value=self.init_url,
+            align="end",
+            sizing_mode="stretch_width",
+            width_policy="max",
+        )
+        self.protocol = pn.widgets.Select(
+            options=sorted(known_implementations),
+            value=self.init_protocol,
+            name="protocol",
+            align="center",
+        )
+        self.kwargs = pn.widgets.TextInput(
+            name="kwargs", value=self.init_kwargs, align="center"
+        )
+        self.go = pn.widgets.Button(name="⇨", align="end", width=45)
+        self.main = SingleSelect(size=10)
+        self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
+        self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end")
+        self._register(self.protocol, "protocol_changed", auto=True)
+        self._register(self.go, "go_clicked", "clicks", auto=True)
+        self._register(self.up, "up_clicked", "clicks", auto=True)
+        self._register(self.home, "home_clicked", "clicks", auto=True)
+        self._register(None, "selection_changed")
+        self.main.connect("selected", self.selection_changed)
+        self._register(None, "directory_entered")
+        self.prev_protocol = self.protocol.value
+        self.prev_kwargs = self.storage_options
+        self.filter_sel = pn.widgets.CheckBoxGroup(
+            value=[], options=[], inline=False, align="end", width_policy="min"
+        )
+        self._register(self.filter_sel, "filters_changed", auto=True)
+        self.panel = pn.Column(
+            pn.Row(self.protocol, self.kwargs),
+            pn.Row(self.home, self.up, self.url, self.go, self.filter_sel),
+            self.main.panel,
+        )
+        self.set_filters(self.filters)
+        self.go_clicked()
+    def set_filters(self, filters=None):
+        self.filters = filters
+        if filters:
+            self.filter_sel.options = filters
+            self.filter_sel.value = filters
+        else:
+            self.filter_sel.options = []
+            self.filter_sel.value = []
+    @property
+    def storage_options(self):
+        """Value of the kwargs box as a dictionary"""
+        return ast.literal_eval(self.kwargs.value) or {}
+    @property
+    def fs(self):
+        """Current filesystem instance"""
+        if self._fs is None:
+            cls = get_filesystem_class(self.protocol.value)
+            self._fs = cls(**self.storage_options)
+        return self._fs
+    @property
+    def urlpath(self):
+        """URL of currently selected item"""
+        return (
+            (f"{self.protocol.value}://{self.main.value[0]}")
+            if self.main.value
+            else None
+        )
+    def open_file(self, mode="rb", compression=None, encoding=None):
+        """Create OpenFile instance for the currently selected item
+        For example, in a notebook you might do something like
+        .. code-block::
+            [ ]: sel = FileSelector(); sel
+            # user selects their file
+            [ ]: with sel.open_file('rb') as f:
+            ...      out = f.read()
+        Parameters
+        ----------
+        mode: str (optional)
+            Open mode for the file.
+        compression: str (optional)
+            The interact with the file as compressed. Set to 'infer' to guess
+            compression from the file ending
+        encoding: str (optional)
+            If using text mode, use this encoding; defaults to UTF8.
+        """
+        if self.urlpath is None:
+            raise ValueError("No file selected")
+        return OpenFile(self.fs, self.urlpath, mode, compression, encoding)
+    def filters_changed(self, values):
+        self.filters = values
+        self.go_clicked()
+    def selection_changed(self, *_):
+        if self.urlpath is None:
+            return
+        if self.fs.isdir(self.urlpath):
+            self.url.value = self.fs._strip_protocol(self.urlpath)
+        self.go_clicked()
+    def go_clicked(self, *_):
+        if (
+            self.prev_protocol != self.protocol.value
+            or self.prev_kwargs != self.storage_options
+        ):
+            self._fs = None  # causes fs to be recreated
+            self.prev_protocol = self.protocol.value
+            self.prev_kwargs = self.storage_options
+        listing = sorted(
+            self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"]
+        )
+        listing = [
+            l
+            for l in listing
+            if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore)
+        ]
+        folders = {
+            "📁 " + o["name"].rsplit("/", 1)[-1]: o["name"]
+            for o in listing
+            if o["type"] == "directory"
+        }
+        files = {
+            "📄 " + o["name"].rsplit("/", 1)[-1]: o["name"]
+            for o in listing
+            if o["type"] == "file"
+        }
+        if self.filters:
+            files = {
+                k: v
+                for k, v in files.items()
+                if any(v.endswith(ext) for ext in self.filters)
+            }
+        self.main.set_options(dict(**folders, **files))
+    def protocol_changed(self, *_):
+        self._fs = None
+        self.main.options = []
+        self.url.value = ""
+    def home_clicked(self, *_):
+        self.protocol.value = self.init_protocol
+        self.kwargs.value = self.init_kwargs
+        self.url.value = self.init_url
+        self.go_clicked()
+    def up_clicked(self, *_):
+        self.url.value = self.fs._parent(self.url.value)
+        self.go_clicked()