Akash6776 commited on
Commit
d151e9e
·
verified ·
1 Parent(s): 5c2009a

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +33 -0
  2. 17.BangBros.Halloween Special With A Threesome.mih16199.mp4 +3 -0
  3. 17.BangBros.Stalking Pussy.bbe16045.mp4 +3 -0
  4. 17.Clips4Sale.Cory's Super Heroine Adventures.Super Spy vs Super Villain.mp4 +3 -0
  5. 17.DevilsFilm.Mommy Likes To Watch 2.mp4 +3 -0
  6. 17.DigitalPlayground.When College Girls Attack.mp4 +3 -0
  7. 17.EvilAngel.Porn Pro Drills 20YO Rim Jobber Kenzie.mp4 +3 -0
  8. 17.GirlsWay.Biology Exam.mp4 +3 -0
  9. 17.LoveHerFeet.Multiplayer.mp4 +3 -0
  10. 17.Mofos.Spinner Sucks Cock for Fame.mp4 +3 -0
  11. 17.NewSensations.Sexy Little Sister Kenzie Wants To Do It.mp4 +3 -0
  12. 18.Brazzers.Selfies With The Dean.mp4 +3 -0
  13. 18.BurningAngel.Human Pinata.mp4 +3 -0
  14. 18.CherryPimps.Wild Girls Kenzie Reeves And Vina Sky LIVE.mp4 +3 -0
  15. 18.Down The Hatch 29.mp4 +3 -0
  16. 18.GirlsWay.I Love You Both.mp4 +3 -0
  17. 18.GirlsWay.I Love You Too.mp4 +3 -0
  18. 18.GirlsWay.Peer Pressure - The Sharing Sisters.mp4 +3 -0
  19. 18.LesbianX.Anal Gaping Slumber Party.mp4 +3 -0
  20. 18.MissaX.In Love With Daddy.mp4 +3 -0
  21. 18.Mofos.A Sneaky Threesome Situation.mp4 +3 -0
  22. 18.Mofos.Teen Tastes Horny Stepmom's Pussy.mp4 +3 -0
  23. 18.Nubiles-Porn.Spring Break Lake Powell 1.mp4 +3 -0
  24. 18.PervCity.Tiny Blonde Teen Kenzie Reeves Has A Big Orgasm.mp4 +3 -0
  25. 18.PornFidelity.Losing It.mp4 +3 -0
  26. 18.PropertySex.VixenX.Been a While.mp4 +3 -0
  27. 18.PureTaboo.Trailer Park Taboo 3.mp4 +3 -0
  28. 18.RealityKings.Social Proof.mp4 +3 -0
  29. 19.Analized.Is The Ultimate Teenage Anal Slut.mp4 +3 -0
  30. 19.ArchangelVideo.My Pussy Is Dripping.mp4 +3 -0
  31. 19.BlowPass.Kenzie Loves Cock.mp4 +3 -0
  32. 19.Deeper.Dare You.101231.mp4 +3 -0
  33. 19.DogFartNetwork.BlackMeatWhiteFeet.And Big Black Cock.mp4 +3 -0
  34. 19.ElegantAngel.It'S A Family Thing 3.mp4 +3 -0
  35. venv/lib/python3.12/site-packages/filelock-3.18.0.dist-info/licenses/LICENSE +24 -0
  36. venv/lib/python3.12/site-packages/fsspec/__init__.py +71 -0
  37. venv/lib/python3.12/site-packages/fsspec/_version.py +21 -0
  38. venv/lib/python3.12/site-packages/fsspec/archive.py +75 -0
  39. venv/lib/python3.12/site-packages/fsspec/asyn.py +1110 -0
  40. venv/lib/python3.12/site-packages/fsspec/caching.py +1005 -0
  41. venv/lib/python3.12/site-packages/fsspec/callbacks.py +324 -0
  42. venv/lib/python3.12/site-packages/fsspec/compression.py +175 -0
  43. venv/lib/python3.12/site-packages/fsspec/config.py +131 -0
  44. venv/lib/python3.12/site-packages/fsspec/conftest.py +55 -0
  45. venv/lib/python3.12/site-packages/fsspec/core.py +743 -0
  46. venv/lib/python3.12/site-packages/fsspec/dircache.py +98 -0
  47. venv/lib/python3.12/site-packages/fsspec/exceptions.py +18 -0
  48. venv/lib/python3.12/site-packages/fsspec/fuse.py +324 -0
  49. venv/lib/python3.12/site-packages/fsspec/generic.py +395 -0
  50. venv/lib/python3.12/site-packages/fsspec/gui.py +416 -0
.gitattributes CHANGED
@@ -103,3 +103,36 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
103
  18.Clips4Sale.Primal's[[:space:]]Taboo[[:space:]]Sex.The[[:space:]]Sister[[:space:]]That[[:space:]]Got[[:space:]]What[[:space:]]She[[:space:]]Wanted.mp4 filter=lfs diff=lfs merge=lfs -text
104
  18.POVD.Thanksgiving[[:space:]]Creampie.mp4 filter=lfs diff=lfs merge=lfs -text
105
  18.NewSensations.Kenzie[[:space:]]Waits[[:space:]]For[[:space:]]Daddy[[:space:]]To[[:space:]]Cum.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  18.Clips4Sale.Primal's[[:space:]]Taboo[[:space:]]Sex.The[[:space:]]Sister[[:space:]]That[[:space:]]Got[[:space:]]What[[:space:]]She[[:space:]]Wanted.mp4 filter=lfs diff=lfs merge=lfs -text
104
  18.POVD.Thanksgiving[[:space:]]Creampie.mp4 filter=lfs diff=lfs merge=lfs -text
105
  18.NewSensations.Kenzie[[:space:]]Waits[[:space:]]For[[:space:]]Daddy[[:space:]]To[[:space:]]Cum.mp4 filter=lfs diff=lfs merge=lfs -text
106
+ 18.Nubiles-Porn.Spring[[:space:]]Break[[:space:]]Lake[[:space:]]Powell[[:space:]]1.mp4 filter=lfs diff=lfs merge=lfs -text
107
+ 18.Down[[:space:]]The[[:space:]]Hatch[[:space:]]29.mp4 filter=lfs diff=lfs merge=lfs -text
108
+ 19.ElegantAngel.It'S[[:space:]]A[[:space:]]Family[[:space:]]Thing[[:space:]]3.mp4 filter=lfs diff=lfs merge=lfs -text
109
+ 19.BlowPass.Kenzie[[:space:]]Loves[[:space:]]Cock.mp4 filter=lfs diff=lfs merge=lfs -text
110
+ 18.MissaX.In[[:space:]]Love[[:space:]]With[[:space:]]Daddy.mp4 filter=lfs diff=lfs merge=lfs -text
111
+ 18.CherryPimps.Wild[[:space:]]Girls[[:space:]]Kenzie[[:space:]]Reeves[[:space:]]And[[:space:]]Vina[[:space:]]Sky[[:space:]]LIVE.mp4 filter=lfs diff=lfs merge=lfs -text
112
+ 17.EvilAngel.Porn[[:space:]]Pro[[:space:]]Drills[[:space:]]20YO[[:space:]]Rim[[:space:]]Jobber[[:space:]]Kenzie.mp4 filter=lfs diff=lfs merge=lfs -text
113
+ 18.GirlsWay.Peer[[:space:]]Pressure[[:space:]]-[[:space:]]The[[:space:]]Sharing[[:space:]]Sisters.mp4 filter=lfs diff=lfs merge=lfs -text
114
+ 19.ArchangelVideo.My[[:space:]]Pussy[[:space:]]Is[[:space:]]Dripping.mp4 filter=lfs diff=lfs merge=lfs -text
115
+ 18.Mofos.Teen[[:space:]]Tastes[[:space:]]Horny[[:space:]]Stepmom's[[:space:]]Pussy.mp4 filter=lfs diff=lfs merge=lfs -text
116
+ 19.Analized.Is[[:space:]]The[[:space:]]Ultimate[[:space:]]Teenage[[:space:]]Anal[[:space:]]Slut.mp4 filter=lfs diff=lfs merge=lfs -text
117
+ 17.GirlsWay.Biology[[:space:]]Exam.mp4 filter=lfs diff=lfs merge=lfs -text
118
+ 18.GirlsWay.I[[:space:]]Love[[:space:]]You[[:space:]]Too.mp4 filter=lfs diff=lfs merge=lfs -text
119
+ 18.Brazzers.Selfies[[:space:]]With[[:space:]]The[[:space:]]Dean.mp4 filter=lfs diff=lfs merge=lfs -text
120
+ 18.PornFidelity.Losing[[:space:]]It.mp4 filter=lfs diff=lfs merge=lfs -text
121
+ 18.PropertySex.VixenX.Been[[:space:]]a[[:space:]]While.mp4 filter=lfs diff=lfs merge=lfs -text
122
+ 18.RealityKings.Social[[:space:]]Proof.mp4 filter=lfs diff=lfs merge=lfs -text
123
+ 19.DogFartNetwork.BlackMeatWhiteFeet.And[[:space:]]Big[[:space:]]Black[[:space:]]Cock.mp4 filter=lfs diff=lfs merge=lfs -text
124
+ 17.NewSensations.Sexy[[:space:]]Little[[:space:]]Sister[[:space:]]Kenzie[[:space:]]Wants[[:space:]]To[[:space:]]Do[[:space:]]It.mp4 filter=lfs diff=lfs merge=lfs -text
125
+ 17.BangBros.Stalking[[:space:]]Pussy.bbe16045.mp4 filter=lfs diff=lfs merge=lfs -text
126
+ 18.BurningAngel.Human[[:space:]]Pinata.mp4 filter=lfs diff=lfs merge=lfs -text
127
+ 17.DigitalPlayground.When[[:space:]]College[[:space:]]Girls[[:space:]]Attack.mp4 filter=lfs diff=lfs merge=lfs -text
128
+ 17.BangBros.Halloween[[:space:]]Special[[:space:]]With[[:space:]]A[[:space:]]Threesome.mih16199.mp4 filter=lfs diff=lfs merge=lfs -text
129
+ 17.Clips4Sale.Cory's[[:space:]]Super[[:space:]]Heroine[[:space:]]Adventures.Super[[:space:]]Spy[[:space:]]vs[[:space:]]Super[[:space:]]Villain.mp4 filter=lfs diff=lfs merge=lfs -text
130
+ 17.DevilsFilm.Mommy[[:space:]]Likes[[:space:]]To[[:space:]]Watch[[:space:]]2.mp4 filter=lfs diff=lfs merge=lfs -text
131
+ 18.LesbianX.Anal[[:space:]]Gaping[[:space:]]Slumber[[:space:]]Party.mp4 filter=lfs diff=lfs merge=lfs -text
132
+ 17.LoveHerFeet.Multiplayer.mp4 filter=lfs diff=lfs merge=lfs -text
133
+ 18.PureTaboo.Trailer[[:space:]]Park[[:space:]]Taboo[[:space:]]3.mp4 filter=lfs diff=lfs merge=lfs -text
134
+ 18.PervCity.Tiny[[:space:]]Blonde[[:space:]]Teen[[:space:]]Kenzie[[:space:]]Reeves[[:space:]]Has[[:space:]]A[[:space:]]Big[[:space:]]Orgasm.mp4 filter=lfs diff=lfs merge=lfs -text
135
+ 18.GirlsWay.I[[:space:]]Love[[:space:]]You[[:space:]]Both.mp4 filter=lfs diff=lfs merge=lfs -text
136
+ 19.Deeper.Dare[[:space:]]You.101231.mp4 filter=lfs diff=lfs merge=lfs -text
137
+ 17.Mofos.Spinner[[:space:]]Sucks[[:space:]]Cock[[:space:]]for[[:space:]]Fame.mp4 filter=lfs diff=lfs merge=lfs -text
138
+ 18.Mofos.A[[:space:]]Sneaky[[:space:]]Threesome[[:space:]]Situation.mp4 filter=lfs diff=lfs merge=lfs -text
17.BangBros.Halloween Special With A Threesome.mih16199.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:884e879b96ec030cc8a14e1e7a32a9f65d39a85354ad2bb21d188ab04e278f5b
3
+ size 2422708123
17.BangBros.Stalking Pussy.bbe16045.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3ea68e1a0c05b5018c53f2a326f7157bac8c8090d065bac6cc2ef6cf0ba19a
3
+ size 1934457211
17.Clips4Sale.Cory's Super Heroine Adventures.Super Spy vs Super Villain.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43034cc231bc539dc50693ec5870ac2257d0b6a10b6ae17599217010be5f6fb9
3
+ size 870873298
17.DevilsFilm.Mommy Likes To Watch 2.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7e298a4ac6d03b2a47ae94ff40e28c23b63971025a6b3ceafe2589c1df0b07e
3
+ size 648854235
17.DigitalPlayground.When College Girls Attack.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36f85fa2a830fafa0f9c8db488c6ab9747667053d3fef0c386e94775a0fbac54
3
+ size 1377442258
17.EvilAngel.Porn Pro Drills 20YO Rim Jobber Kenzie.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b436c5e022e8202b69e9979f447148453d79d42a678f83945b7d23bc351df7
3
+ size 872243786
17.GirlsWay.Biology Exam.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c23a3b48e46e4d430fcb18bc8bf09eb89528b306ba3a6cf52b04755649feb210
3
+ size 1592606077
17.LoveHerFeet.Multiplayer.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8613c7f6f11351f522a5bf70620e551c052c4792ce570657c66280b28c867411
3
+ size 3093689738
17.Mofos.Spinner Sucks Cock for Fame.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc2687e483e90c4b6f4e4e54116c3c51d8e5191959ff42d58c195fabfff1231
3
+ size 3241790517
17.NewSensations.Sexy Little Sister Kenzie Wants To Do It.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9bb7a31a09c770db451f771fc31bc2dfbe4a1a52a38732ba2d89af3173dd7ff
3
+ size 3461941399
18.Brazzers.Selfies With The Dean.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c49d7ad91b9dde0e1f7ecbbee569b8097576f3cc2309920fcb881b5c39b44281
3
+ size 1276373412
18.BurningAngel.Human Pinata.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4885a26346a2c582ff867896d250cc98fe001bf4c45174a00173071256ecbf
3
+ size 1038731154
18.CherryPimps.Wild Girls Kenzie Reeves And Vina Sky LIVE.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d1cb8e43eb18a4dfc7af97ff4f6a863d75b407021068ecf3fab86eab1913742
3
+ size 2245370403
18.Down The Hatch 29.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e7d0fea1b35b4b04cef496a03c9f23dcb6c0e3c6565cc5f1d59994fb8169d4e
3
+ size 392756777
18.GirlsWay.I Love You Both.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0736f08f5d81d6c6e66c992593094c159d798435cbbd692a3c1eaa51113837
3
+ size 1980038412
18.GirlsWay.I Love You Too.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:896db7f0a41b9c1d84d06aa92f5496bcc6f6e14a4e0376abeb340344f4e6c2d1
3
+ size 2152267815
18.GirlsWay.Peer Pressure - The Sharing Sisters.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0547799d4e8c871884e338174dc002df3ec5946f54fe6353a1d07ffd1e17136
3
+ size 2978343703
18.LesbianX.Anal Gaping Slumber Party.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33336790c8d0c09d9906bb51ae56f221d9225977573f3d592568a4acb6eb2b91
3
+ size 1370767697
18.MissaX.In Love With Daddy.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7168383cafc414bfbf65c11d51a2cfbd1cedf81d187007c29987e440093968dd
3
+ size 1567924147
18.Mofos.A Sneaky Threesome Situation.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9ac5c78b37130309d6611d6df4a3a682b9be19def52c46ba8c6affc88b49ced
3
+ size 2553280872
18.Mofos.Teen Tastes Horny Stepmom's Pussy.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d1164cdc046dc7460372352ed5926b3aea7de4f672887f3b18a624004155b7b
3
+ size 3301988676
18.Nubiles-Porn.Spring Break Lake Powell 1.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4a91615484fd7a480576e77ce80ec4bea136be0a8b972951064b3c9cf34b1f
3
+ size 1786930517
18.PervCity.Tiny Blonde Teen Kenzie Reeves Has A Big Orgasm.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165f672238f896968ddddb4e96c28db32f373147b590fd8b3db67817b8371292
3
+ size 944538607
18.PornFidelity.Losing It.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e73aed9043d81b40c137869eeb68d81ab7a45d72df52b3809f4783a885eef5
3
+ size 1017159752
18.PropertySex.VixenX.Been a While.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83995f5915320416fb2dea76cca22aa7b9c2cd53685bc8d7ddfc259a6c1097c1
3
+ size 3520856768
18.PureTaboo.Trailer Park Taboo 3.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d399deb76ce6f713f9a9f1af86701fc6777a5d628119a23666819e62dd7510
3
+ size 1237153374
18.RealityKings.Social Proof.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:009864a80f69aa32706b2277dde75504ed0eb4dfb56a05721a4bbcb21f957f13
3
+ size 3091314409
19.Analized.Is The Ultimate Teenage Anal Slut.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc240588b4f60a7542ef89fa2a55a85d91aeb6c0afcb7fed8c5e94965652aecc
3
+ size 2621108271
19.ArchangelVideo.My Pussy Is Dripping.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f9393e755d2618e2bc21c8617a0ab223ebb78f7dbf7c642d9cdd479b5670fe
3
+ size 1916687999
19.BlowPass.Kenzie Loves Cock.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c78684762704b653ed11810e6d9522fe196d95fe576049ebc42fd3dd5cad21
3
+ size 2174866312
19.Deeper.Dare You.101231.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3564ff804379de6fda587031bdb9bf565f656fd08dec1b36b1081c12aafac10f
3
+ size 3373955368
19.DogFartNetwork.BlackMeatWhiteFeet.And Big Black Cock.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21b54744365838fda8d13819af758b20a72cd105c8772aa3f169b17117bcf850
3
+ size 3124523333
19.ElegantAngel.It'S A Family Thing 3.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10c8f94415bc763c56e16ca16862386c01aaa4fe597e4ed6a2b0a518594671bf
3
+ size 604433859
venv/lib/python3.12/site-packages/filelock-3.18.0.dist-info/licenses/LICENSE ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org>
venv/lib/python3.12/site-packages/fsspec/__init__.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from . import caching
2
+ from ._version import __version__ # noqa: F401
3
+ from .callbacks import Callback
4
+ from .compression import available_compressions
5
+ from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
6
+ from .exceptions import FSTimeoutError
7
+ from .mapping import FSMap, get_mapper
8
+ from .registry import (
9
+ available_protocols,
10
+ filesystem,
11
+ get_filesystem_class,
12
+ register_implementation,
13
+ registry,
14
+ )
15
+ from .spec import AbstractFileSystem
16
+
17
+ __all__ = [
18
+ "AbstractFileSystem",
19
+ "FSTimeoutError",
20
+ "FSMap",
21
+ "filesystem",
22
+ "register_implementation",
23
+ "get_filesystem_class",
24
+ "get_fs_token_paths",
25
+ "get_mapper",
26
+ "open",
27
+ "open_files",
28
+ "open_local",
29
+ "registry",
30
+ "caching",
31
+ "Callback",
32
+ "available_protocols",
33
+ "available_compressions",
34
+ "url_to_fs",
35
+ ]
36
+
37
+
38
+ def process_entries():
39
+ try:
40
+ from importlib.metadata import entry_points
41
+ except ImportError:
42
+ return
43
+ if entry_points is not None:
44
+ try:
45
+ eps = entry_points()
46
+ except TypeError:
47
+ pass # importlib-metadata < 0.8
48
+ else:
49
+ if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
50
+ specs = eps.select(group="fsspec.specs")
51
+ else:
52
+ specs = eps.get("fsspec.specs", [])
53
+ registered_names = {}
54
+ for spec in specs:
55
+ err_msg = f"Unable to load filesystem from {spec}"
56
+ name = spec.name
57
+ if name in registered_names:
58
+ continue
59
+ registered_names[name] = True
60
+ register_implementation(
61
+ name,
62
+ spec.value.replace(":", "."),
63
+ errtxt=err_msg,
64
+ # We take our implementations as the ones to overload with if
65
+ # for some reason we encounter some, may be the same, already
66
+ # registered
67
+ clobber=True,
68
+ )
69
+
70
+
71
+ process_entries()
venv/lib/python3.12/site-packages/fsspec/_version.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
6
+ TYPE_CHECKING = False
7
+ if TYPE_CHECKING:
8
+ from typing import Tuple
9
+ from typing import Union
10
+
11
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
12
+ else:
13
+ VERSION_TUPLE = object
14
+
15
+ version: str
16
+ __version__: str
17
+ __version_tuple__: VERSION_TUPLE
18
+ version_tuple: VERSION_TUPLE
19
+
20
+ __version__ = version = '2025.5.1'
21
+ __version_tuple__ = version_tuple = (2025, 5, 1)
venv/lib/python3.12/site-packages/fsspec/archive.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import operator
2
+
3
+ from fsspec import AbstractFileSystem
4
+ from fsspec.utils import tokenize
5
+
6
+
7
+ class AbstractArchiveFileSystem(AbstractFileSystem):
8
+ """
9
+ A generic superclass for implementing Archive-based filesystems.
10
+
11
+ Currently, it is shared amongst
12
+ :class:`~fsspec.implementations.zip.ZipFileSystem`,
13
+ :class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
14
+ :class:`~fsspec.implementations.tar.TarFileSystem`.
15
+ """
16
+
17
+ def __str__(self):
18
+ return f"<Archive-like object {type(self).__name__} at {id(self)}>"
19
+
20
+ __repr__ = __str__
21
+
22
+ def ukey(self, path):
23
+ return tokenize(path, self.fo, self.protocol)
24
+
25
+ def _all_dirnames(self, paths):
26
+ """Returns *all* directory names for each path in paths, including intermediate
27
+ ones.
28
+
29
+ Parameters
30
+ ----------
31
+ paths: Iterable of path strings
32
+ """
33
+ if len(paths) == 0:
34
+ return set()
35
+
36
+ dirnames = {self._parent(path) for path in paths} - {self.root_marker}
37
+ return dirnames | self._all_dirnames(dirnames)
38
+
39
+ def info(self, path, **kwargs):
40
+ self._get_dirs()
41
+ path = self._strip_protocol(path)
42
+ if path in {"", "/"} and self.dir_cache:
43
+ return {"name": "", "type": "directory", "size": 0}
44
+ if path in self.dir_cache:
45
+ return self.dir_cache[path]
46
+ elif path + "/" in self.dir_cache:
47
+ return self.dir_cache[path + "/"]
48
+ else:
49
+ raise FileNotFoundError(path)
50
+
51
+ def ls(self, path, detail=True, **kwargs):
52
+ self._get_dirs()
53
+ paths = {}
54
+ for p, f in self.dir_cache.items():
55
+ p = p.rstrip("/")
56
+ if "/" in p:
57
+ root = p.rsplit("/", 1)[0]
58
+ else:
59
+ root = ""
60
+ if root == path.rstrip("/"):
61
+ paths[p] = f
62
+ elif all(
63
+ (a == b)
64
+ for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
65
+ ):
66
+ # root directory entry
67
+ ppath = p.rstrip("/").split("/", 1)[0]
68
+ if ppath not in paths:
69
+ out = {"name": ppath, "size": 0, "type": "directory"}
70
+ paths[ppath] = out
71
+ if detail:
72
+ out = sorted(paths.values(), key=operator.itemgetter("name"))
73
+ return out
74
+ else:
75
+ return sorted(paths)
venv/lib/python3.12/site-packages/fsspec/asyn.py ADDED
@@ -0,0 +1,1110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import asyncio.events
3
+ import functools
4
+ import inspect
5
+ import io
6
+ import numbers
7
+ import os
8
+ import re
9
+ import threading
10
+ from contextlib import contextmanager
11
+ from glob import has_magic
12
+ from typing import TYPE_CHECKING, Iterable
13
+
14
+ from .callbacks import DEFAULT_CALLBACK
15
+ from .exceptions import FSTimeoutError
16
+ from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
17
+ from .spec import AbstractBufferedFile, AbstractFileSystem
18
+ from .utils import glob_translate, is_exception, other_paths
19
+
20
+ private = re.compile("_[^_]")
21
+ iothread = [None] # dedicated fsspec IO thread
22
+ loop = [None] # global event loop for any non-async instance
23
+ _lock = None # global lock placeholder
24
+ get_running_loop = asyncio.get_running_loop
25
+
26
+
27
+ def get_lock():
28
+ """Allocate or return a threading lock.
29
+
30
+ The lock is allocated on first use to allow setting one lock per forked process.
31
+ """
32
+ global _lock
33
+ if not _lock:
34
+ _lock = threading.Lock()
35
+ return _lock
36
+
37
+
38
+ def reset_lock():
39
+ """Reset the global lock.
40
+
41
+ This should be called only on the init of a forked process to reset the lock to
42
+ None, enabling the new forked process to get a new lock.
43
+ """
44
+ global _lock
45
+
46
+ iothread[0] = None
47
+ loop[0] = None
48
+ _lock = None
49
+
50
+
51
+ async def _runner(event, coro, result, timeout=None):
52
+ timeout = timeout if timeout else None # convert 0 or 0.0 to None
53
+ if timeout is not None:
54
+ coro = asyncio.wait_for(coro, timeout=timeout)
55
+ try:
56
+ result[0] = await coro
57
+ except Exception as ex:
58
+ result[0] = ex
59
+ finally:
60
+ event.set()
61
+
62
+
63
+ def sync(loop, func, *args, timeout=None, **kwargs):
64
+ """
65
+ Make loop run coroutine until it returns. Runs in other thread
66
+
67
+ Examples
68
+ --------
69
+ >>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
70
+ timeout=timeout, **kwargs)
71
+ """
72
+ timeout = timeout if timeout else None # convert 0 or 0.0 to None
73
+ # NB: if the loop is not running *yet*, it is OK to submit work
74
+ # and we will wait for it
75
+ if loop is None or loop.is_closed():
76
+ raise RuntimeError("Loop is not running")
77
+ try:
78
+ loop0 = asyncio.events.get_running_loop()
79
+ if loop0 is loop:
80
+ raise NotImplementedError("Calling sync() from within a running loop")
81
+ except NotImplementedError:
82
+ raise
83
+ except RuntimeError:
84
+ pass
85
+ coro = func(*args, **kwargs)
86
+ result = [None]
87
+ event = threading.Event()
88
+ asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
89
+ while True:
90
+ # this loops allows thread to get interrupted
91
+ if event.wait(1):
92
+ break
93
+ if timeout is not None:
94
+ timeout -= 1
95
+ if timeout < 0:
96
+ raise FSTimeoutError
97
+
98
+ return_result = result[0]
99
+ if isinstance(return_result, asyncio.TimeoutError):
100
+ # suppress asyncio.TimeoutError, raise FSTimeoutError
101
+ raise FSTimeoutError from return_result
102
+ elif isinstance(return_result, BaseException):
103
+ raise return_result
104
+ else:
105
+ return return_result
106
+
107
+
108
+ def sync_wrapper(func, obj=None):
109
+ """Given a function, make so can be called in blocking contexts
110
+
111
+ Leave obj=None if defining within a class. Pass the instance if attaching
112
+ as an attribute of the instance.
113
+ """
114
+
115
+ @functools.wraps(func)
116
+ def wrapper(*args, **kwargs):
117
+ self = obj or args[0]
118
+ return sync(self.loop, func, *args, **kwargs)
119
+
120
+ return wrapper
121
+
122
+
123
+ @contextmanager
124
+ def _selector_policy():
125
+ original_policy = asyncio.get_event_loop_policy()
126
+ try:
127
+ if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"):
128
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
129
+
130
+ yield
131
+ finally:
132
+ asyncio.set_event_loop_policy(original_policy)
133
+
134
+
135
+ def get_loop():
136
+ """Create or return the default fsspec IO loop
137
+
138
+ The loop will be running on a separate thread.
139
+ """
140
+ if loop[0] is None:
141
+ with get_lock():
142
+ # repeat the check just in case the loop got filled between the
143
+ # previous two calls from another thread
144
+ if loop[0] is None:
145
+ with _selector_policy():
146
+ loop[0] = asyncio.new_event_loop()
147
+ th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
148
+ th.daemon = True
149
+ th.start()
150
+ iothread[0] = th
151
+ return loop[0]
152
+
153
+
154
+ def reset_after_fork():
155
+ global lock
156
+ loop[0] = None
157
+ iothread[0] = None
158
+ lock = None
159
+
160
+
161
+ if hasattr(os, "register_at_fork"):
162
+ # should be posix; this will do nothing for spawn or forkserver subprocesses
163
+ os.register_at_fork(after_in_child=reset_after_fork)
164
+
165
+
166
+ if TYPE_CHECKING:
167
+ import resource
168
+
169
+ ResourceError = resource.error
170
+ else:
171
+ try:
172
+ import resource
173
+ except ImportError:
174
+ resource = None
175
+ ResourceError = OSError
176
+ else:
177
+ ResourceError = getattr(resource, "error", OSError)
178
+
179
+ _DEFAULT_BATCH_SIZE = 128
180
+ _NOFILES_DEFAULT_BATCH_SIZE = 1280
181
+
182
+
183
+ def _get_batch_size(nofiles=False):
184
+ from fsspec.config import conf
185
+
186
+ if nofiles:
187
+ if "nofiles_gather_batch_size" in conf:
188
+ return conf["nofiles_gather_batch_size"]
189
+ else:
190
+ if "gather_batch_size" in conf:
191
+ return conf["gather_batch_size"]
192
+ if nofiles:
193
+ return _NOFILES_DEFAULT_BATCH_SIZE
194
+ if resource is None:
195
+ return _DEFAULT_BATCH_SIZE
196
+
197
+ try:
198
+ soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
199
+ except (ImportError, ValueError, ResourceError):
200
+ return _DEFAULT_BATCH_SIZE
201
+
202
+ if soft_limit == resource.RLIM_INFINITY:
203
+ return -1
204
+ else:
205
+ return soft_limit // 8
206
+
207
+
208
+ def running_async() -> bool:
209
+ """Being executed by an event loop?"""
210
+ try:
211
+ asyncio.get_running_loop()
212
+ return True
213
+ except RuntimeError:
214
+ return False
215
+
216
+
217
+ async def _run_coros_in_chunks(
218
+ coros,
219
+ batch_size=None,
220
+ callback=DEFAULT_CALLBACK,
221
+ timeout=None,
222
+ return_exceptions=False,
223
+ nofiles=False,
224
+ ):
225
+ """Run the given coroutines in chunks.
226
+
227
+ Parameters
228
+ ----------
229
+ coros: list of coroutines to run
230
+ batch_size: int or None
231
+ Number of coroutines to submit/wait on simultaneously.
232
+ If -1, then it will not be any throttling. If
233
+ None, it will be inferred from _get_batch_size()
234
+ callback: fsspec.callbacks.Callback instance
235
+ Gets a relative_update when each coroutine completes
236
+ timeout: number or None
237
+ If given, each coroutine times out after this time. Note that, since
238
+ there are multiple batches, the total run time of this function will in
239
+ general be longer
240
+ return_exceptions: bool
241
+ Same meaning as in asyncio.gather
242
+ nofiles: bool
243
+ If inferring the batch_size, does this operation involve local files?
244
+ If yes, you normally expect smaller batches.
245
+ """
246
+
247
+ if batch_size is None:
248
+ batch_size = _get_batch_size(nofiles=nofiles)
249
+
250
+ if batch_size == -1:
251
+ batch_size = len(coros)
252
+
253
+ assert batch_size > 0
254
+
255
+ async def _run_coro(coro, i):
256
+ try:
257
+ return await asyncio.wait_for(coro, timeout=timeout), i
258
+ except Exception as e:
259
+ if not return_exceptions:
260
+ raise
261
+ return e, i
262
+ finally:
263
+ callback.relative_update(1)
264
+
265
+ i = 0
266
+ n = len(coros)
267
+ results = [None] * n
268
+ pending = set()
269
+
270
+ while pending or i < n:
271
+ while len(pending) < batch_size and i < n:
272
+ pending.add(asyncio.ensure_future(_run_coro(coros[i], i)))
273
+ i += 1
274
+
275
+ if not pending:
276
+ break
277
+
278
+ done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
279
+ while done:
280
+ result, k = await done.pop()
281
+ results[k] = result
282
+
283
+ return results
284
+
285
+
286
+ # these methods should be implemented as async by any async-able backend
287
+ async_methods = [
288
+ "_ls",
289
+ "_cat_file",
290
+ "_get_file",
291
+ "_put_file",
292
+ "_rm_file",
293
+ "_cp_file",
294
+ "_pipe_file",
295
+ "_expand_path",
296
+ "_info",
297
+ "_isfile",
298
+ "_isdir",
299
+ "_exists",
300
+ "_walk",
301
+ "_glob",
302
+ "_find",
303
+ "_du",
304
+ "_size",
305
+ "_mkdir",
306
+ "_makedirs",
307
+ ]
308
+
309
+
310
+ class AsyncFileSystem(AbstractFileSystem):
311
+ """Async file operations, default implementations
312
+
313
+ Passes bulk operations to asyncio.gather for concurrent operation.
314
+
315
+ Implementations that have concurrent batch operations and/or async methods
316
+ should inherit from this class instead of AbstractFileSystem. Docstrings are
317
+ copied from the un-underscored method in AbstractFileSystem, if not given.
318
+ """
319
+
320
+ # note that methods do not have docstring here; they will be copied
321
+ # for _* methods and inferred for overridden methods.
322
+
323
+ async_impl = True
324
+ mirror_sync_methods = True
325
+ disable_throttling = False
326
+
327
+ def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
328
+ self.asynchronous = asynchronous
329
+ self._pid = os.getpid()
330
+ if not asynchronous:
331
+ self._loop = loop or get_loop()
332
+ else:
333
+ self._loop = None
334
+ self.batch_size = batch_size
335
+ super().__init__(*args, **kwargs)
336
+
337
+ @property
338
+ def loop(self):
339
+ if self._pid != os.getpid():
340
+ raise RuntimeError("This class is not fork-safe")
341
+ return self._loop
342
+
343
+ async def _rm_file(self, path, **kwargs):
344
+ raise NotImplementedError
345
+
346
+ async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
347
+ # TODO: implement on_error
348
+ batch_size = batch_size or self.batch_size
349
+ path = await self._expand_path(path, recursive=recursive)
350
+ return await _run_coros_in_chunks(
351
+ [self._rm_file(p, **kwargs) for p in reversed(path)],
352
+ batch_size=batch_size,
353
+ nofiles=True,
354
+ )
355
+
356
+ async def _cp_file(self, path1, path2, **kwargs):
357
+ raise NotImplementedError
358
+
359
+ async def _mv_file(self, path1, path2):
360
+ await self._cp_file(path1, path2)
361
+ await self._rm_file(path1)
362
+
363
+ async def _copy(
364
+ self,
365
+ path1,
366
+ path2,
367
+ recursive=False,
368
+ on_error=None,
369
+ maxdepth=None,
370
+ batch_size=None,
371
+ **kwargs,
372
+ ):
373
+ if on_error is None and recursive:
374
+ on_error = "ignore"
375
+ elif on_error is None:
376
+ on_error = "raise"
377
+
378
+ if isinstance(path1, list) and isinstance(path2, list):
379
+ # No need to expand paths when both source and destination
380
+ # are provided as lists
381
+ paths1 = path1
382
+ paths2 = path2
383
+ else:
384
+ source_is_str = isinstance(path1, str)
385
+ paths1 = await self._expand_path(
386
+ path1, maxdepth=maxdepth, recursive=recursive
387
+ )
388
+ if source_is_str and (not recursive or maxdepth is not None):
389
+ # Non-recursive glob does not copy directories
390
+ paths1 = [
391
+ p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
392
+ ]
393
+ if not paths1:
394
+ return
395
+
396
+ source_is_file = len(paths1) == 1
397
+ dest_is_dir = isinstance(path2, str) and (
398
+ trailing_sep(path2) or await self._isdir(path2)
399
+ )
400
+
401
+ exists = source_is_str and (
402
+ (has_magic(path1) and source_is_file)
403
+ or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
404
+ )
405
+ paths2 = other_paths(
406
+ paths1,
407
+ path2,
408
+ exists=exists,
409
+ flatten=not source_is_str,
410
+ )
411
+
412
+ batch_size = batch_size or self.batch_size
413
+ coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
414
+ result = await _run_coros_in_chunks(
415
+ coros, batch_size=batch_size, return_exceptions=True, nofiles=True
416
+ )
417
+
418
+ for ex in filter(is_exception, result):
419
+ if on_error == "ignore" and isinstance(ex, FileNotFoundError):
420
+ continue
421
+ raise ex
422
+
423
+ async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
424
+ raise NotImplementedError
425
+
426
+ async def _pipe(self, path, value=None, batch_size=None, **kwargs):
427
+ if isinstance(path, str):
428
+ path = {path: value}
429
+ batch_size = batch_size or self.batch_size
430
+ return await _run_coros_in_chunks(
431
+ [self._pipe_file(k, v, **kwargs) for k, v in path.items()],
432
+ batch_size=batch_size,
433
+ nofiles=True,
434
+ )
435
+
436
+ async def _process_limits(self, url, start, end):
437
+ """Helper for "Range"-based _cat_file"""
438
+ size = None
439
+ suff = False
440
+ if start is not None and start < 0:
441
+ # if start is negative and end None, end is the "suffix length"
442
+ if end is None:
443
+ end = -start
444
+ start = ""
445
+ suff = True
446
+ else:
447
+ size = size or (await self._info(url))["size"]
448
+ start = size + start
449
+ elif start is None:
450
+ start = 0
451
+ if not suff:
452
+ if end is not None and end < 0:
453
+ if start is not None:
454
+ size = size or (await self._info(url))["size"]
455
+ end = size + end
456
+ elif end is None:
457
+ end = ""
458
+ if isinstance(end, numbers.Integral):
459
+ end -= 1 # bytes range is inclusive
460
+ return f"bytes={start}-{end}"
461
+
462
+ async def _cat_file(self, path, start=None, end=None, **kwargs):
463
+ raise NotImplementedError
464
+
465
+ async def _cat(
466
+ self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
467
+ ):
468
+ paths = await self._expand_path(path, recursive=recursive)
469
+ coros = [self._cat_file(path, **kwargs) for path in paths]
470
+ batch_size = batch_size or self.batch_size
471
+ out = await _run_coros_in_chunks(
472
+ coros, batch_size=batch_size, nofiles=True, return_exceptions=True
473
+ )
474
+ if on_error == "raise":
475
+ ex = next(filter(is_exception, out), False)
476
+ if ex:
477
+ raise ex
478
+ if (
479
+ len(paths) > 1
480
+ or isinstance(path, list)
481
+ or paths[0] != self._strip_protocol(path)
482
+ ):
483
+ return {
484
+ k: v
485
+ for k, v in zip(paths, out)
486
+ if on_error != "omit" or not is_exception(v)
487
+ }
488
+ else:
489
+ return out[0]
490
+
491
+ async def _cat_ranges(
492
+ self,
493
+ paths,
494
+ starts,
495
+ ends,
496
+ max_gap=None,
497
+ batch_size=None,
498
+ on_error="return",
499
+ **kwargs,
500
+ ):
501
+ """Get the contents of byte ranges from one or more files
502
+
503
+ Parameters
504
+ ----------
505
+ paths: list
506
+ A list of of filepaths on this filesystems
507
+ starts, ends: int or list
508
+ Bytes limits of the read. If using a single int, the same value will be
509
+ used to read all the specified files.
510
+ """
511
+ # TODO: on_error
512
+ if max_gap is not None:
513
+ # use utils.merge_offset_ranges
514
+ raise NotImplementedError
515
+ if not isinstance(paths, list):
516
+ raise TypeError
517
+ if not isinstance(starts, Iterable):
518
+ starts = [starts] * len(paths)
519
+ if not isinstance(ends, Iterable):
520
+ ends = [ends] * len(paths)
521
+ if len(starts) != len(paths) or len(ends) != len(paths):
522
+ raise ValueError
523
+ coros = [
524
+ self._cat_file(p, start=s, end=e, **kwargs)
525
+ for p, s, e in zip(paths, starts, ends)
526
+ ]
527
+ batch_size = batch_size or self.batch_size
528
+ return await _run_coros_in_chunks(
529
+ coros, batch_size=batch_size, nofiles=True, return_exceptions=True
530
+ )
531
+
532
+ async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
533
+ raise NotImplementedError
534
+
535
+ async def _put(
536
+ self,
537
+ lpath,
538
+ rpath,
539
+ recursive=False,
540
+ callback=DEFAULT_CALLBACK,
541
+ batch_size=None,
542
+ maxdepth=None,
543
+ **kwargs,
544
+ ):
545
+ """Copy file(s) from local.
546
+
547
+ Copies a specific file or tree of files (if recursive=True). If rpath
548
+ ends with a "/", it will be assumed to be a directory, and target files
549
+ will go within.
550
+
551
+ The put_file method will be called concurrently on a batch of files. The
552
+ batch_size option can configure the amount of futures that can be executed
553
+ at the same time. If it is -1, then all the files will be uploaded concurrently.
554
+ The default can be set for this instance by passing "batch_size" in the
555
+ constructor, or for all instances by setting the "gather_batch_size" key
556
+ in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
557
+ """
558
+ if isinstance(lpath, list) and isinstance(rpath, list):
559
+ # No need to expand paths when both source and destination
560
+ # are provided as lists
561
+ rpaths = rpath
562
+ lpaths = lpath
563
+ else:
564
+ source_is_str = isinstance(lpath, str)
565
+ if source_is_str:
566
+ lpath = make_path_posix(lpath)
567
+ fs = LocalFileSystem()
568
+ lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
569
+ if source_is_str and (not recursive or maxdepth is not None):
570
+ # Non-recursive glob does not copy directories
571
+ lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
572
+ if not lpaths:
573
+ return
574
+
575
+ source_is_file = len(lpaths) == 1
576
+ dest_is_dir = isinstance(rpath, str) and (
577
+ trailing_sep(rpath) or await self._isdir(rpath)
578
+ )
579
+
580
+ rpath = self._strip_protocol(rpath)
581
+ exists = source_is_str and (
582
+ (has_magic(lpath) and source_is_file)
583
+ or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
584
+ )
585
+ rpaths = other_paths(
586
+ lpaths,
587
+ rpath,
588
+ exists=exists,
589
+ flatten=not source_is_str,
590
+ )
591
+
592
+ is_dir = {l: os.path.isdir(l) for l in lpaths}
593
+ rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
594
+ file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
595
+
596
+ await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
597
+ batch_size = batch_size or self.batch_size
598
+
599
+ coros = []
600
+ callback.set_size(len(file_pairs))
601
+ for lfile, rfile in file_pairs:
602
+ put_file = callback.branch_coro(self._put_file)
603
+ coros.append(put_file(lfile, rfile, **kwargs))
604
+
605
+ return await _run_coros_in_chunks(
606
+ coros, batch_size=batch_size, callback=callback
607
+ )
608
+
609
+ async def _get_file(self, rpath, lpath, **kwargs):
610
+ raise NotImplementedError
611
+
612
+ async def _get(
613
+ self,
614
+ rpath,
615
+ lpath,
616
+ recursive=False,
617
+ callback=DEFAULT_CALLBACK,
618
+ maxdepth=None,
619
+ **kwargs,
620
+ ):
621
+ """Copy file(s) to local.
622
+
623
+ Copies a specific file or tree of files (if recursive=True). If lpath
624
+ ends with a "/", it will be assumed to be a directory, and target files
625
+ will go within. Can submit a list of paths, which may be glob-patterns
626
+ and will be expanded.
627
+
628
+ The get_file method will be called concurrently on a batch of files. The
629
+ batch_size option can configure the amount of futures that can be executed
630
+ at the same time. If it is -1, then all the files will be uploaded concurrently.
631
+ The default can be set for this instance by passing "batch_size" in the
632
+ constructor, or for all instances by setting the "gather_batch_size" key
633
+ in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
634
+ """
635
+ if isinstance(lpath, list) and isinstance(rpath, list):
636
+ # No need to expand paths when both source and destination
637
+ # are provided as lists
638
+ rpaths = rpath
639
+ lpaths = lpath
640
+ else:
641
+ source_is_str = isinstance(rpath, str)
642
+ # First check for rpath trailing slash as _strip_protocol removes it.
643
+ source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
644
+ rpath = self._strip_protocol(rpath)
645
+ rpaths = await self._expand_path(
646
+ rpath, recursive=recursive, maxdepth=maxdepth
647
+ )
648
+ if source_is_str and (not recursive or maxdepth is not None):
649
+ # Non-recursive glob does not copy directories
650
+ rpaths = [
651
+ p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
652
+ ]
653
+ if not rpaths:
654
+ return
655
+
656
+ lpath = make_path_posix(lpath)
657
+ source_is_file = len(rpaths) == 1
658
+ dest_is_dir = isinstance(lpath, str) and (
659
+ trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
660
+ )
661
+
662
+ exists = source_is_str and (
663
+ (has_magic(rpath) and source_is_file)
664
+ or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
665
+ )
666
+ lpaths = other_paths(
667
+ rpaths,
668
+ lpath,
669
+ exists=exists,
670
+ flatten=not source_is_str,
671
+ )
672
+
673
+ [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
674
+ batch_size = kwargs.pop("batch_size", self.batch_size)
675
+
676
+ coros = []
677
+ callback.set_size(len(lpaths))
678
+ for lpath, rpath in zip(lpaths, rpaths):
679
+ get_file = callback.branch_coro(self._get_file)
680
+ coros.append(get_file(rpath, lpath, **kwargs))
681
+ return await _run_coros_in_chunks(
682
+ coros, batch_size=batch_size, callback=callback
683
+ )
684
+
685
+ async def _isfile(self, path):
686
+ try:
687
+ return (await self._info(path))["type"] == "file"
688
+ except: # noqa: E722
689
+ return False
690
+
691
+ async def _isdir(self, path):
692
+ try:
693
+ return (await self._info(path))["type"] == "directory"
694
+ except OSError:
695
+ return False
696
+
697
+ async def _size(self, path):
698
+ return (await self._info(path)).get("size", None)
699
+
700
+ async def _sizes(self, paths, batch_size=None):
701
+ batch_size = batch_size or self.batch_size
702
+ return await _run_coros_in_chunks(
703
+ [self._size(p) for p in paths], batch_size=batch_size
704
+ )
705
+
706
+ async def _exists(self, path, **kwargs):
707
+ try:
708
+ await self._info(path, **kwargs)
709
+ return True
710
+ except FileNotFoundError:
711
+ return False
712
+
713
+ async def _info(self, path, **kwargs):
714
+ raise NotImplementedError
715
+
716
+ async def _ls(self, path, detail=True, **kwargs):
717
+ raise NotImplementedError
718
+
719
+ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
720
+ if maxdepth is not None and maxdepth < 1:
721
+ raise ValueError("maxdepth must be at least 1")
722
+
723
+ path = self._strip_protocol(path)
724
+ full_dirs = {}
725
+ dirs = {}
726
+ files = {}
727
+
728
+ detail = kwargs.pop("detail", False)
729
+ try:
730
+ listing = await self._ls(path, detail=True, **kwargs)
731
+ except (FileNotFoundError, OSError) as e:
732
+ if on_error == "raise":
733
+ raise
734
+ elif callable(on_error):
735
+ on_error(e)
736
+ if detail:
737
+ yield path, {}, {}
738
+ else:
739
+ yield path, [], []
740
+ return
741
+
742
+ for info in listing:
743
+ # each info name must be at least [path]/part , but here
744
+ # we check also for names like [path]/part/
745
+ pathname = info["name"].rstrip("/")
746
+ name = pathname.rsplit("/", 1)[-1]
747
+ if info["type"] == "directory" and pathname != path:
748
+ # do not include "self" path
749
+ full_dirs[name] = pathname
750
+ dirs[name] = info
751
+ elif pathname == path:
752
+ # file-like with same name as give path
753
+ files[""] = info
754
+ else:
755
+ files[name] = info
756
+
757
+ if detail:
758
+ yield path, dirs, files
759
+ else:
760
+ yield path, list(dirs), list(files)
761
+
762
+ if maxdepth is not None:
763
+ maxdepth -= 1
764
+ if maxdepth < 1:
765
+ return
766
+
767
+ for d in dirs:
768
+ async for _ in self._walk(
769
+ full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
770
+ ):
771
+ yield _
772
+
773
+ async def _glob(self, path, maxdepth=None, **kwargs):
774
+ if maxdepth is not None and maxdepth < 1:
775
+ raise ValueError("maxdepth must be at least 1")
776
+
777
+ import re
778
+
779
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
780
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
781
+ path = self._strip_protocol(path)
782
+ append_slash_to_dirname = ends_with_sep or path.endswith(
783
+ tuple(sep + "**" for sep in seps)
784
+ )
785
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
786
+ idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
787
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
788
+
789
+ min_idx = min(idx_star, idx_qmark, idx_brace)
790
+
791
+ detail = kwargs.pop("detail", False)
792
+
793
+ if not has_magic(path):
794
+ if await self._exists(path, **kwargs):
795
+ if not detail:
796
+ return [path]
797
+ else:
798
+ return {path: await self._info(path, **kwargs)}
799
+ else:
800
+ if not detail:
801
+ return [] # glob of non-existent returns empty
802
+ else:
803
+ return {}
804
+ elif "/" in path[:min_idx]:
805
+ min_idx = path[:min_idx].rindex("/")
806
+ root = path[: min_idx + 1]
807
+ depth = path[min_idx + 1 :].count("/") + 1
808
+ else:
809
+ root = ""
810
+ depth = path[min_idx + 1 :].count("/") + 1
811
+
812
+ if "**" in path:
813
+ if maxdepth is not None:
814
+ idx_double_stars = path.find("**")
815
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
816
+ depth = depth - depth_double_stars + maxdepth
817
+ else:
818
+ depth = None
819
+
820
+ allpaths = await self._find(
821
+ root, maxdepth=depth, withdirs=True, detail=True, **kwargs
822
+ )
823
+
824
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
825
+ pattern = re.compile(pattern)
826
+
827
+ out = {
828
+ p: info
829
+ for p, info in sorted(allpaths.items())
830
+ if pattern.match(
831
+ p + "/"
832
+ if append_slash_to_dirname and info["type"] == "directory"
833
+ else p
834
+ )
835
+ }
836
+
837
+ if detail:
838
+ return out
839
+ else:
840
+ return list(out)
841
+
842
+ async def _du(self, path, total=True, maxdepth=None, **kwargs):
843
+ sizes = {}
844
+ # async for?
845
+ for f in await self._find(path, maxdepth=maxdepth, **kwargs):
846
+ info = await self._info(f)
847
+ sizes[info["name"]] = info["size"]
848
+ if total:
849
+ return sum(sizes.values())
850
+ else:
851
+ return sizes
852
+
853
+ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
854
+ path = self._strip_protocol(path)
855
+ out = {}
856
+ detail = kwargs.pop("detail", False)
857
+
858
+ # Add the root directory if withdirs is requested
859
+ # This is needed for posix glob compliance
860
+ if withdirs and path != "" and await self._isdir(path):
861
+ out[path] = await self._info(path)
862
+
863
+ # async for?
864
+ async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
865
+ if withdirs:
866
+ files.update(dirs)
867
+ out.update({info["name"]: info for name, info in files.items()})
868
+ if not out and (await self._isfile(path)):
869
+ # walk works on directories, but find should also return [path]
870
+ # when path happens to be a file
871
+ out[path] = {}
872
+ names = sorted(out)
873
+ if not detail:
874
+ return names
875
+ else:
876
+ return {name: out[name] for name in names}
877
+
878
+ async def _expand_path(self, path, recursive=False, maxdepth=None):
879
+ if maxdepth is not None and maxdepth < 1:
880
+ raise ValueError("maxdepth must be at least 1")
881
+
882
+ if isinstance(path, str):
883
+ out = await self._expand_path([path], recursive, maxdepth)
884
+ else:
885
+ out = set()
886
+ path = [self._strip_protocol(p) for p in path]
887
+ for p in path: # can gather here
888
+ if has_magic(p):
889
+ bit = set(await self._glob(p, maxdepth=maxdepth))
890
+ out |= bit
891
+ if recursive:
892
+ # glob call above expanded one depth so if maxdepth is defined
893
+ # then decrement it in expand_path call below. If it is zero
894
+ # after decrementing then avoid expand_path call.
895
+ if maxdepth is not None and maxdepth <= 1:
896
+ continue
897
+ out |= set(
898
+ await self._expand_path(
899
+ list(bit),
900
+ recursive=recursive,
901
+ maxdepth=maxdepth - 1 if maxdepth is not None else None,
902
+ )
903
+ )
904
+ continue
905
+ elif recursive:
906
+ rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
907
+ out |= rec
908
+ if p not in out and (recursive is False or (await self._exists(p))):
909
+ # should only check once, for the root
910
+ out.add(p)
911
+ if not out:
912
+ raise FileNotFoundError(path)
913
+ return sorted(out)
914
+
915
+ async def _mkdir(self, path, create_parents=True, **kwargs):
916
+ pass # not necessary to implement, may not have directories
917
+
918
+ async def _makedirs(self, path, exist_ok=False):
919
+ pass # not necessary to implement, may not have directories
920
+
921
+ async def open_async(self, path, mode="rb", **kwargs):
922
+ if "b" not in mode or kwargs.get("compression"):
923
+ raise ValueError
924
+ raise NotImplementedError
925
+
926
+
927
+ def mirror_sync_methods(obj):
928
+ """Populate sync and async methods for obj
929
+
930
+ For each method will create a sync version if the name refers to an async method
931
+ (coroutine) and there is no override in the child class; will create an async
932
+ method for the corresponding sync method if there is no implementation.
933
+
934
+ Uses the methods specified in
935
+ - async_methods: the set that an implementation is expected to provide
936
+ - default_async_methods: that can be derived from their sync version in
937
+ AbstractFileSystem
938
+ - AsyncFileSystem: async-specific default coroutines
939
+ """
940
+ from fsspec import AbstractFileSystem
941
+
942
+ for method in async_methods + dir(AsyncFileSystem):
943
+ if not method.startswith("_"):
944
+ continue
945
+ smethod = method[1:]
946
+ if private.match(method):
947
+ isco = inspect.iscoroutinefunction(getattr(obj, method, None))
948
+ unsync = getattr(getattr(obj, smethod, False), "__func__", None)
949
+ is_default = unsync is getattr(AbstractFileSystem, smethod, "")
950
+ if isco and is_default:
951
+ mth = sync_wrapper(getattr(obj, method), obj=obj)
952
+ setattr(obj, smethod, mth)
953
+ if not mth.__doc__:
954
+ mth.__doc__ = getattr(
955
+ getattr(AbstractFileSystem, smethod, None), "__doc__", ""
956
+ )
957
+
958
+
959
+ class FSSpecCoroutineCancel(Exception):
960
+ pass
961
+
962
+
963
+ def _dump_running_tasks(
964
+ printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
965
+ ):
966
+ import traceback
967
+
968
+ tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
969
+ if printout:
970
+ [task.print_stack() for task in tasks]
971
+ out = [
972
+ {
973
+ "locals": task._coro.cr_frame.f_locals,
974
+ "file": task._coro.cr_frame.f_code.co_filename,
975
+ "firstline": task._coro.cr_frame.f_code.co_firstlineno,
976
+ "linelo": task._coro.cr_frame.f_lineno,
977
+ "stack": traceback.format_stack(task._coro.cr_frame),
978
+ "task": task if with_task else None,
979
+ }
980
+ for task in tasks
981
+ ]
982
+ if cancel:
983
+ for t in tasks:
984
+ cbs = t._callbacks
985
+ t.cancel()
986
+ asyncio.futures.Future.set_exception(t, exc)
987
+ asyncio.futures.Future.cancel(t)
988
+ [cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures
989
+ try:
990
+ t._coro.throw(exc) # exits coro, unless explicitly handled
991
+ except exc:
992
+ pass
993
+ return out
994
+
995
+
996
+ class AbstractAsyncStreamedFile(AbstractBufferedFile):
997
+ # no read buffering, and always auto-commit
998
+ # TODO: readahead might still be useful here, but needs async version
999
+
1000
+ async def read(self, length=-1):
1001
+ """
1002
+ Return data from cache, or fetch pieces as necessary
1003
+
1004
+ Parameters
1005
+ ----------
1006
+ length: int (-1)
1007
+ Number of bytes to read; if <0, all remaining bytes.
1008
+ """
1009
+ length = -1 if length is None else int(length)
1010
+ if self.mode != "rb":
1011
+ raise ValueError("File not in read mode")
1012
+ if length < 0:
1013
+ length = self.size - self.loc
1014
+ if self.closed:
1015
+ raise ValueError("I/O operation on closed file.")
1016
+ if length == 0:
1017
+ # don't even bother calling fetch
1018
+ return b""
1019
+ out = await self._fetch_range(self.loc, self.loc + length)
1020
+ self.loc += len(out)
1021
+ return out
1022
+
1023
+ async def write(self, data):
1024
+ """
1025
+ Write data to buffer.
1026
+
1027
+ Buffer only sent on flush() or if buffer is greater than
1028
+ or equal to blocksize.
1029
+
1030
+ Parameters
1031
+ ----------
1032
+ data: bytes
1033
+ Set of bytes to be written.
1034
+ """
1035
+ if self.mode not in {"wb", "ab"}:
1036
+ raise ValueError("File not in write mode")
1037
+ if self.closed:
1038
+ raise ValueError("I/O operation on closed file.")
1039
+ if self.forced:
1040
+ raise ValueError("This file has been force-flushed, can only close")
1041
+ out = self.buffer.write(data)
1042
+ self.loc += out
1043
+ if self.buffer.tell() >= self.blocksize:
1044
+ await self.flush()
1045
+ return out
1046
+
1047
+ async def close(self):
1048
+ """Close file
1049
+
1050
+ Finalizes writes, discards cache
1051
+ """
1052
+ if getattr(self, "_unclosable", False):
1053
+ return
1054
+ if self.closed:
1055
+ return
1056
+ if self.mode == "rb":
1057
+ self.cache = None
1058
+ else:
1059
+ if not self.forced:
1060
+ await self.flush(force=True)
1061
+
1062
+ if self.fs is not None:
1063
+ self.fs.invalidate_cache(self.path)
1064
+ self.fs.invalidate_cache(self.fs._parent(self.path))
1065
+
1066
+ self.closed = True
1067
+
1068
+ async def flush(self, force=False):
1069
+ if self.closed:
1070
+ raise ValueError("Flush on closed file")
1071
+ if force and self.forced:
1072
+ raise ValueError("Force flush cannot be called more than once")
1073
+ if force:
1074
+ self.forced = True
1075
+
1076
+ if self.mode not in {"wb", "ab"}:
1077
+ # no-op to flush on read-mode
1078
+ return
1079
+
1080
+ if not force and self.buffer.tell() < self.blocksize:
1081
+ # Defer write on small block
1082
+ return
1083
+
1084
+ if self.offset is None:
1085
+ # Initialize a multipart upload
1086
+ self.offset = 0
1087
+ try:
1088
+ await self._initiate_upload()
1089
+ except:
1090
+ self.closed = True
1091
+ raise
1092
+
1093
+ if await self._upload_chunk(final=force) is not False:
1094
+ self.offset += self.buffer.seek(0, 2)
1095
+ self.buffer = io.BytesIO()
1096
+
1097
+ async def __aenter__(self):
1098
+ return self
1099
+
1100
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1101
+ await self.close()
1102
+
1103
+ async def _fetch_range(self, start, end):
1104
+ raise NotImplementedError
1105
+
1106
+ async def _initiate_upload(self):
1107
+ pass
1108
+
1109
+ async def _upload_chunk(self, final=False):
1110
+ raise NotImplementedError
venv/lib/python3.12/site-packages/fsspec/caching.py ADDED
@@ -0,0 +1,1005 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import collections
4
+ import functools
5
+ import logging
6
+ import math
7
+ import os
8
+ import threading
9
+ import warnings
10
+ from concurrent.futures import Future, ThreadPoolExecutor
11
+ from itertools import groupby
12
+ from operator import itemgetter
13
+ from typing import (
14
+ TYPE_CHECKING,
15
+ Any,
16
+ Callable,
17
+ ClassVar,
18
+ Generic,
19
+ NamedTuple,
20
+ Optional,
21
+ OrderedDict,
22
+ TypeVar,
23
+ )
24
+
25
+ if TYPE_CHECKING:
26
+ import mmap
27
+
28
+ from typing_extensions import ParamSpec
29
+
30
+ P = ParamSpec("P")
31
+ else:
32
+ P = TypeVar("P")
33
+
34
+ T = TypeVar("T")
35
+
36
+
37
+ logger = logging.getLogger("fsspec")
38
+
39
+ Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
40
+ MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes
41
+
42
+
43
+ class BaseCache:
44
+ """Pass-though cache: doesn't keep anything, calls every time
45
+
46
+ Acts as base class for other cachers
47
+
48
+ Parameters
49
+ ----------
50
+ blocksize: int
51
+ How far to read ahead in numbers of bytes
52
+ fetcher: func
53
+ Function of the form f(start, end) which gets bytes from remote as
54
+ specified
55
+ size: int
56
+ How big this file is
57
+ """
58
+
59
+ name: ClassVar[str] = "none"
60
+
61
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
62
+ self.blocksize = blocksize
63
+ self.nblocks = 0
64
+ self.fetcher = fetcher
65
+ self.size = size
66
+ self.hit_count = 0
67
+ self.miss_count = 0
68
+ # the bytes that we actually requested
69
+ self.total_requested_bytes = 0
70
+
71
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
72
+ if start is None:
73
+ start = 0
74
+ if stop is None:
75
+ stop = self.size
76
+ if start >= self.size or start >= stop:
77
+ return b""
78
+ return self.fetcher(start, stop)
79
+
80
+ def _reset_stats(self) -> None:
81
+ """Reset hit and miss counts for a more ganular report e.g. by file."""
82
+ self.hit_count = 0
83
+ self.miss_count = 0
84
+ self.total_requested_bytes = 0
85
+
86
+ def _log_stats(self) -> str:
87
+ """Return a formatted string of the cache statistics."""
88
+ if self.hit_count == 0 and self.miss_count == 0:
89
+ # a cache that does nothing, this is for logs only
90
+ return ""
91
+ return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
92
+
93
+ def __repr__(self) -> str:
94
+ # TODO: use rich for better formatting
95
+ return f"""
96
+ <{self.__class__.__name__}:
97
+ block size : {self.blocksize}
98
+ block count : {self.nblocks}
99
+ file size : {self.size}
100
+ cache hits : {self.hit_count}
101
+ cache misses: {self.miss_count}
102
+ total requested bytes: {self.total_requested_bytes}>
103
+ """
104
+
105
+
106
+ class MMapCache(BaseCache):
107
+ """memory-mapped sparse file cache
108
+
109
+ Opens temporary file, which is filled blocks-wise when data is requested.
110
+ Ensure there is enough disc space in the temporary location.
111
+
112
+ This cache method might only work on posix
113
+
114
+ Parameters
115
+ ----------
116
+ blocksize: int
117
+ How far to read ahead in numbers of bytes
118
+ fetcher: Fetcher
119
+ Function of the form f(start, end) which gets bytes from remote as
120
+ specified
121
+ size: int
122
+ How big this file is
123
+ location: str
124
+ Where to create the temporary file. If None, a temporary file is
125
+ created using tempfile.TemporaryFile().
126
+ blocks: set[int]
127
+ Set of block numbers that have already been fetched. If None, an empty
128
+ set is created.
129
+ multi_fetcher: MultiFetcher
130
+ Function of the form f([(start, end)]) which gets bytes from remote
131
+ as specified. This function is used to fetch multiple blocks at once.
132
+ If not specified, the fetcher function is used instead.
133
+ """
134
+
135
+ name = "mmap"
136
+
137
+ def __init__(
138
+ self,
139
+ blocksize: int,
140
+ fetcher: Fetcher,
141
+ size: int,
142
+ location: str | None = None,
143
+ blocks: set[int] | None = None,
144
+ multi_fetcher: MultiFetcher | None = None,
145
+ ) -> None:
146
+ super().__init__(blocksize, fetcher, size)
147
+ self.blocks = set() if blocks is None else blocks
148
+ self.location = location
149
+ self.multi_fetcher = multi_fetcher
150
+ self.cache = self._makefile()
151
+
152
+ def _makefile(self) -> mmap.mmap | bytearray:
153
+ import mmap
154
+ import tempfile
155
+
156
+ if self.size == 0:
157
+ return bytearray()
158
+
159
+ # posix version
160
+ if self.location is None or not os.path.exists(self.location):
161
+ if self.location is None:
162
+ fd = tempfile.TemporaryFile()
163
+ self.blocks = set()
164
+ else:
165
+ fd = open(self.location, "wb+")
166
+ fd.seek(self.size - 1)
167
+ fd.write(b"1")
168
+ fd.flush()
169
+ else:
170
+ fd = open(self.location, "r+b")
171
+
172
+ return mmap.mmap(fd.fileno(), self.size)
173
+
174
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
175
+ logger.debug(f"MMap cache fetching {start}-{end}")
176
+ if start is None:
177
+ start = 0
178
+ if end is None:
179
+ end = self.size
180
+ if start >= self.size or start >= end:
181
+ return b""
182
+ start_block = start // self.blocksize
183
+ end_block = end // self.blocksize
184
+ block_range = range(start_block, end_block + 1)
185
+ # Determine which blocks need to be fetched. This sequence is sorted by construction.
186
+ need = (i for i in block_range if i not in self.blocks)
187
+ # Count the number of blocks already cached
188
+ self.hit_count += sum(1 for i in block_range if i in self.blocks)
189
+
190
+ ranges = []
191
+
192
+ # Consolidate needed blocks.
193
+ # Algorithm adapted from Python 2.x itertools documentation.
194
+ # We are grouping an enumerated sequence of blocks. By comparing when the difference
195
+ # between an ascending range (provided by enumerate) and the needed block numbers
196
+ # we can detect when the block number skips values. The key computes this difference.
197
+ # Whenever the difference changes, we know that we have previously cached block(s),
198
+ # and a new group is started. In other words, this algorithm neatly groups
199
+ # runs of consecutive block numbers so they can be fetched together.
200
+ for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
201
+ # Extract the blocks from the enumerated sequence
202
+ _blocks = tuple(map(itemgetter(1), _blocks))
203
+ # Compute start of first block
204
+ sstart = _blocks[0] * self.blocksize
205
+ # Compute the end of the last block. Last block may not be full size.
206
+ send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
207
+
208
+ # Fetch bytes (could be multiple consecutive blocks)
209
+ self.total_requested_bytes += send - sstart
210
+ logger.debug(
211
+ f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
212
+ )
213
+ ranges.append((sstart, send))
214
+
215
+ # Update set of cached blocks
216
+ self.blocks.update(_blocks)
217
+ # Update cache statistics with number of blocks we had to cache
218
+ self.miss_count += len(_blocks)
219
+
220
+ if not ranges:
221
+ return self.cache[start:end]
222
+
223
+ if self.multi_fetcher:
224
+ logger.debug(f"MMap get blocks {ranges}")
225
+ for idx, r in enumerate(self.multi_fetcher(ranges)):
226
+ (sstart, send) = ranges[idx]
227
+ logger.debug(f"MMap copy block ({sstart}-{send}")
228
+ self.cache[sstart:send] = r
229
+ else:
230
+ for sstart, send in ranges:
231
+ logger.debug(f"MMap get block ({sstart}-{send}")
232
+ self.cache[sstart:send] = self.fetcher(sstart, send)
233
+
234
+ return self.cache[start:end]
235
+
236
+ def __getstate__(self) -> dict[str, Any]:
237
+ state = self.__dict__.copy()
238
+ # Remove the unpicklable entries.
239
+ del state["cache"]
240
+ return state
241
+
242
+ def __setstate__(self, state: dict[str, Any]) -> None:
243
+ # Restore instance attributes
244
+ self.__dict__.update(state)
245
+ self.cache = self._makefile()
246
+
247
+
248
+ class ReadAheadCache(BaseCache):
249
+ """Cache which reads only when we get beyond a block of data
250
+
251
+ This is a much simpler version of BytesCache, and does not attempt to
252
+ fill holes in the cache or keep fragments alive. It is best suited to
253
+ many small reads in a sequential order (e.g., reading lines from a file).
254
+ """
255
+
256
+ name = "readahead"
257
+
258
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
259
+ super().__init__(blocksize, fetcher, size)
260
+ self.cache = b""
261
+ self.start = 0
262
+ self.end = 0
263
+
264
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
265
+ if start is None:
266
+ start = 0
267
+ if end is None or end > self.size:
268
+ end = self.size
269
+ if start >= self.size or start >= end:
270
+ return b""
271
+ l = end - start
272
+ if start >= self.start and end <= self.end:
273
+ # cache hit
274
+ self.hit_count += 1
275
+ return self.cache[start - self.start : end - self.start]
276
+ elif self.start <= start < self.end:
277
+ # partial hit
278
+ self.miss_count += 1
279
+ part = self.cache[start - self.start :]
280
+ l -= len(part)
281
+ start = self.end
282
+ else:
283
+ # miss
284
+ self.miss_count += 1
285
+ part = b""
286
+ end = min(self.size, end + self.blocksize)
287
+ self.total_requested_bytes += end - start
288
+ self.cache = self.fetcher(start, end) # new block replaces old
289
+ self.start = start
290
+ self.end = self.start + len(self.cache)
291
+ return part + self.cache[:l]
292
+
293
+
294
+ class FirstChunkCache(BaseCache):
295
+ """Caches the first block of a file only
296
+
297
+ This may be useful for file types where the metadata is stored in the header,
298
+ but is randomly accessed.
299
+ """
300
+
301
+ name = "first"
302
+
303
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
304
+ if blocksize > size:
305
+ # this will buffer the whole thing
306
+ blocksize = size
307
+ super().__init__(blocksize, fetcher, size)
308
+ self.cache: bytes | None = None
309
+
310
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
311
+ start = start or 0
312
+ if start > self.size:
313
+ logger.debug("FirstChunkCache: requested start > file size")
314
+ return b""
315
+
316
+ end = min(end, self.size)
317
+
318
+ if start < self.blocksize:
319
+ if self.cache is None:
320
+ self.miss_count += 1
321
+ if end > self.blocksize:
322
+ self.total_requested_bytes += end
323
+ data = self.fetcher(0, end)
324
+ self.cache = data[: self.blocksize]
325
+ return data[start:]
326
+ self.cache = self.fetcher(0, self.blocksize)
327
+ self.total_requested_bytes += self.blocksize
328
+ part = self.cache[start:end]
329
+ if end > self.blocksize:
330
+ self.total_requested_bytes += end - self.blocksize
331
+ part += self.fetcher(self.blocksize, end)
332
+ self.hit_count += 1
333
+ return part
334
+ else:
335
+ self.miss_count += 1
336
+ self.total_requested_bytes += end - start
337
+ return self.fetcher(start, end)
338
+
339
+
340
+ class BlockCache(BaseCache):
341
+ """
342
+ Cache holding memory as a set of blocks.
343
+
344
+ Requests are only ever made ``blocksize`` at a time, and are
345
+ stored in an LRU cache. The least recently accessed block is
346
+ discarded when more than ``maxblocks`` are stored.
347
+
348
+ Parameters
349
+ ----------
350
+ blocksize : int
351
+ The number of bytes to store in each block.
352
+ Requests are only ever made for ``blocksize``, so this
353
+ should balance the overhead of making a request against
354
+ the granularity of the blocks.
355
+ fetcher : Callable
356
+ size : int
357
+ The total size of the file being cached.
358
+ maxblocks : int
359
+ The maximum number of blocks to cache for. The maximum memory
360
+ use for this cache is then ``blocksize * maxblocks``.
361
+ """
362
+
363
+ name = "blockcache"
364
+
365
+ def __init__(
366
+ self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
367
+ ) -> None:
368
+ super().__init__(blocksize, fetcher, size)
369
+ self.nblocks = math.ceil(size / blocksize)
370
+ self.maxblocks = maxblocks
371
+ self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
372
+
373
+ def cache_info(self):
374
+ """
375
+ The statistics on the block cache.
376
+
377
+ Returns
378
+ -------
379
+ NamedTuple
380
+ Returned directly from the LRU Cache used internally.
381
+ """
382
+ return self._fetch_block_cached.cache_info()
383
+
384
+ def __getstate__(self) -> dict[str, Any]:
385
+ state = self.__dict__
386
+ del state["_fetch_block_cached"]
387
+ return state
388
+
389
+ def __setstate__(self, state: dict[str, Any]) -> None:
390
+ self.__dict__.update(state)
391
+ self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
392
+ self._fetch_block
393
+ )
394
+
395
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
396
+ if start is None:
397
+ start = 0
398
+ if end is None:
399
+ end = self.size
400
+ if start >= self.size or start >= end:
401
+ return b""
402
+
403
+ # byte position -> block numbers
404
+ start_block_number = start // self.blocksize
405
+ end_block_number = end // self.blocksize
406
+
407
+ # these are cached, so safe to do multiple calls for the same start and end.
408
+ for block_number in range(start_block_number, end_block_number + 1):
409
+ self._fetch_block_cached(block_number)
410
+
411
+ return self._read_cache(
412
+ start,
413
+ end,
414
+ start_block_number=start_block_number,
415
+ end_block_number=end_block_number,
416
+ )
417
+
418
+ def _fetch_block(self, block_number: int) -> bytes:
419
+ """
420
+ Fetch the block of data for `block_number`.
421
+ """
422
+ if block_number > self.nblocks:
423
+ raise ValueError(
424
+ f"'block_number={block_number}' is greater than "
425
+ f"the number of blocks ({self.nblocks})"
426
+ )
427
+
428
+ start = block_number * self.blocksize
429
+ end = start + self.blocksize
430
+ self.total_requested_bytes += end - start
431
+ self.miss_count += 1
432
+ logger.info("BlockCache fetching block %d", block_number)
433
+ block_contents = super()._fetch(start, end)
434
+ return block_contents
435
+
436
+ def _read_cache(
437
+ self, start: int, end: int, start_block_number: int, end_block_number: int
438
+ ) -> bytes:
439
+ """
440
+ Read from our block cache.
441
+
442
+ Parameters
443
+ ----------
444
+ start, end : int
445
+ The start and end byte positions.
446
+ start_block_number, end_block_number : int
447
+ The start and end block numbers.
448
+ """
449
+ start_pos = start % self.blocksize
450
+ end_pos = end % self.blocksize
451
+
452
+ self.hit_count += 1
453
+ if start_block_number == end_block_number:
454
+ block: bytes = self._fetch_block_cached(start_block_number)
455
+ return block[start_pos:end_pos]
456
+
457
+ else:
458
+ # read from the initial
459
+ out = [self._fetch_block_cached(start_block_number)[start_pos:]]
460
+
461
+ # intermediate blocks
462
+ # Note: it'd be nice to combine these into one big request. However
463
+ # that doesn't play nicely with our LRU cache.
464
+ out.extend(
465
+ map(
466
+ self._fetch_block_cached,
467
+ range(start_block_number + 1, end_block_number),
468
+ )
469
+ )
470
+
471
+ # final block
472
+ out.append(self._fetch_block_cached(end_block_number)[:end_pos])
473
+
474
+ return b"".join(out)
475
+
476
+
477
+ class BytesCache(BaseCache):
478
+ """Cache which holds data in a in-memory bytes object
479
+
480
+ Implements read-ahead by the block size, for semi-random reads progressing
481
+ through the file.
482
+
483
+ Parameters
484
+ ----------
485
+ trim: bool
486
+ As we read more data, whether to discard the start of the buffer when
487
+ we are more than a blocksize ahead of it.
488
+ """
489
+
490
+ name: ClassVar[str] = "bytes"
491
+
492
+ def __init__(
493
+ self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
494
+ ) -> None:
495
+ super().__init__(blocksize, fetcher, size)
496
+ self.cache = b""
497
+ self.start: int | None = None
498
+ self.end: int | None = None
499
+ self.trim = trim
500
+
501
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
502
+ # TODO: only set start/end after fetch, in case it fails?
503
+ # is this where retry logic might go?
504
+ if start is None:
505
+ start = 0
506
+ if end is None:
507
+ end = self.size
508
+ if start >= self.size or start >= end:
509
+ return b""
510
+ if (
511
+ self.start is not None
512
+ and start >= self.start
513
+ and self.end is not None
514
+ and end < self.end
515
+ ):
516
+ # cache hit: we have all the required data
517
+ offset = start - self.start
518
+ self.hit_count += 1
519
+ return self.cache[offset : offset + end - start]
520
+
521
+ if self.blocksize:
522
+ bend = min(self.size, end + self.blocksize)
523
+ else:
524
+ bend = end
525
+
526
+ if bend == start or start > self.size:
527
+ return b""
528
+
529
+ if (self.start is None or start < self.start) and (
530
+ self.end is None or end > self.end
531
+ ):
532
+ # First read, or extending both before and after
533
+ self.total_requested_bytes += bend - start
534
+ self.miss_count += 1
535
+ self.cache = self.fetcher(start, bend)
536
+ self.start = start
537
+ else:
538
+ assert self.start is not None
539
+ assert self.end is not None
540
+ self.miss_count += 1
541
+
542
+ if start < self.start:
543
+ if self.end is None or self.end - end > self.blocksize:
544
+ self.total_requested_bytes += bend - start
545
+ self.cache = self.fetcher(start, bend)
546
+ self.start = start
547
+ else:
548
+ self.total_requested_bytes += self.start - start
549
+ new = self.fetcher(start, self.start)
550
+ self.start = start
551
+ self.cache = new + self.cache
552
+ elif self.end is not None and bend > self.end:
553
+ if self.end > self.size:
554
+ pass
555
+ elif end - self.end > self.blocksize:
556
+ self.total_requested_bytes += bend - start
557
+ self.cache = self.fetcher(start, bend)
558
+ self.start = start
559
+ else:
560
+ self.total_requested_bytes += bend - self.end
561
+ new = self.fetcher(self.end, bend)
562
+ self.cache = self.cache + new
563
+
564
+ self.end = self.start + len(self.cache)
565
+ offset = start - self.start
566
+ out = self.cache[offset : offset + end - start]
567
+ if self.trim:
568
+ num = (self.end - self.start) // (self.blocksize + 1)
569
+ if num > 1:
570
+ self.start += self.blocksize * num
571
+ self.cache = self.cache[self.blocksize * num :]
572
+ return out
573
+
574
+ def __len__(self) -> int:
575
+ return len(self.cache)
576
+
577
+
578
+ class AllBytes(BaseCache):
579
+ """Cache entire contents of the file"""
580
+
581
+ name: ClassVar[str] = "all"
582
+
583
+ def __init__(
584
+ self,
585
+ blocksize: int | None = None,
586
+ fetcher: Fetcher | None = None,
587
+ size: int | None = None,
588
+ data: bytes | None = None,
589
+ ) -> None:
590
+ super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
591
+ if data is None:
592
+ self.miss_count += 1
593
+ self.total_requested_bytes += self.size
594
+ data = self.fetcher(0, self.size)
595
+ self.data = data
596
+
597
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
598
+ self.hit_count += 1
599
+ return self.data[start:stop]
600
+
601
+
602
+ class KnownPartsOfAFile(BaseCache):
603
+ """
604
+ Cache holding known file parts.
605
+
606
+ Parameters
607
+ ----------
608
+ blocksize: int
609
+ How far to read ahead in numbers of bytes
610
+ fetcher: func
611
+ Function of the form f(start, end) which gets bytes from remote as
612
+ specified
613
+ size: int
614
+ How big this file is
615
+ data: dict
616
+ A dictionary mapping explicit `(start, stop)` file-offset tuples
617
+ with known bytes.
618
+ strict: bool, default True
619
+ Whether to fetch reads that go beyond a known byte-range boundary.
620
+ If `False`, any read that ends outside a known part will be zero
621
+ padded. Note that zero padding will not be used for reads that
622
+ begin outside a known byte-range.
623
+ """
624
+
625
+ name: ClassVar[str] = "parts"
626
+
627
+ def __init__(
628
+ self,
629
+ blocksize: int,
630
+ fetcher: Fetcher,
631
+ size: int,
632
+ data: Optional[dict[tuple[int, int], bytes]] = None,
633
+ strict: bool = True,
634
+ **_: Any,
635
+ ):
636
+ super().__init__(blocksize, fetcher, size)
637
+ self.strict = strict
638
+
639
+ # simple consolidation of contiguous blocks
640
+ if data:
641
+ old_offsets = sorted(data.keys())
642
+ offsets = [old_offsets[0]]
643
+ blocks = [data.pop(old_offsets[0])]
644
+ for start, stop in old_offsets[1:]:
645
+ start0, stop0 = offsets[-1]
646
+ if start == stop0:
647
+ offsets[-1] = (start0, stop)
648
+ blocks[-1] += data.pop((start, stop))
649
+ else:
650
+ offsets.append((start, stop))
651
+ blocks.append(data.pop((start, stop)))
652
+
653
+ self.data = dict(zip(offsets, blocks))
654
+ else:
655
+ self.data = {}
656
+
657
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
658
+ if start is None:
659
+ start = 0
660
+ if stop is None:
661
+ stop = self.size
662
+
663
+ out = b""
664
+ for (loc0, loc1), data in self.data.items():
665
+ # If self.strict=False, use zero-padded data
666
+ # for reads beyond the end of a "known" buffer
667
+ if loc0 <= start < loc1:
668
+ off = start - loc0
669
+ out = data[off : off + stop - start]
670
+ if not self.strict or loc0 <= stop <= loc1:
671
+ # The request is within a known range, or
672
+ # it begins within a known range, and we
673
+ # are allowed to pad reads beyond the
674
+ # buffer with zero
675
+ out += b"\x00" * (stop - start - len(out))
676
+ self.hit_count += 1
677
+ return out
678
+ else:
679
+ # The request ends outside a known range,
680
+ # and we are being "strict" about reads
681
+ # beyond the buffer
682
+ start = loc1
683
+ break
684
+
685
+ # We only get here if there is a request outside the
686
+ # known parts of the file. In an ideal world, this
687
+ # should never happen
688
+ if self.fetcher is None:
689
+ # We cannot fetch the data, so raise an error
690
+ raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
691
+ # We can fetch the data, but should warn the user
692
+ # that this may be slow
693
+ warnings.warn(
694
+ f"Read is outside the known file parts: {(start, stop)}. "
695
+ f"IO/caching performance may be poor!"
696
+ )
697
+ logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
698
+ self.total_requested_bytes += stop - start
699
+ self.miss_count += 1
700
+ return out + super()._fetch(start, stop)
701
+
702
+
703
+ class UpdatableLRU(Generic[P, T]):
704
+ """
705
+ Custom implementation of LRU cache that allows updating keys
706
+
707
+ Used by BackgroudBlockCache
708
+ """
709
+
710
+ class CacheInfo(NamedTuple):
711
+ hits: int
712
+ misses: int
713
+ maxsize: int
714
+ currsize: int
715
+
716
+ def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
717
+ self._cache: OrderedDict[Any, T] = collections.OrderedDict()
718
+ self._func = func
719
+ self._max_size = max_size
720
+ self._hits = 0
721
+ self._misses = 0
722
+ self._lock = threading.Lock()
723
+
724
+ def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
725
+ if kwargs:
726
+ raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
727
+ with self._lock:
728
+ if args in self._cache:
729
+ self._cache.move_to_end(args)
730
+ self._hits += 1
731
+ return self._cache[args]
732
+
733
+ result = self._func(*args, **kwargs)
734
+
735
+ with self._lock:
736
+ self._cache[args] = result
737
+ self._misses += 1
738
+ if len(self._cache) > self._max_size:
739
+ self._cache.popitem(last=False)
740
+
741
+ return result
742
+
743
+ def is_key_cached(self, *args: Any) -> bool:
744
+ with self._lock:
745
+ return args in self._cache
746
+
747
+ def add_key(self, result: T, *args: Any) -> None:
748
+ with self._lock:
749
+ self._cache[args] = result
750
+ if len(self._cache) > self._max_size:
751
+ self._cache.popitem(last=False)
752
+
753
+ def cache_info(self) -> UpdatableLRU.CacheInfo:
754
+ with self._lock:
755
+ return self.CacheInfo(
756
+ maxsize=self._max_size,
757
+ currsize=len(self._cache),
758
+ hits=self._hits,
759
+ misses=self._misses,
760
+ )
761
+
762
+
763
+ class BackgroundBlockCache(BaseCache):
764
+ """
765
+ Cache holding memory as a set of blocks with pre-loading of
766
+ the next block in the background.
767
+
768
+ Requests are only ever made ``blocksize`` at a time, and are
769
+ stored in an LRU cache. The least recently accessed block is
770
+ discarded when more than ``maxblocks`` are stored. If the
771
+ next block is not in cache, it is loaded in a separate thread
772
+ in non-blocking way.
773
+
774
+ Parameters
775
+ ----------
776
+ blocksize : int
777
+ The number of bytes to store in each block.
778
+ Requests are only ever made for ``blocksize``, so this
779
+ should balance the overhead of making a request against
780
+ the granularity of the blocks.
781
+ fetcher : Callable
782
+ size : int
783
+ The total size of the file being cached.
784
+ maxblocks : int
785
+ The maximum number of blocks to cache for. The maximum memory
786
+ use for this cache is then ``blocksize * maxblocks``.
787
+ """
788
+
789
+ name: ClassVar[str] = "background"
790
+
791
+ def __init__(
792
+ self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
793
+ ) -> None:
794
+ super().__init__(blocksize, fetcher, size)
795
+ self.nblocks = math.ceil(size / blocksize)
796
+ self.maxblocks = maxblocks
797
+ self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
798
+
799
+ self._thread_executor = ThreadPoolExecutor(max_workers=1)
800
+ self._fetch_future_block_number: int | None = None
801
+ self._fetch_future: Future[bytes] | None = None
802
+ self._fetch_future_lock = threading.Lock()
803
+
804
+ def cache_info(self) -> UpdatableLRU.CacheInfo:
805
+ """
806
+ The statistics on the block cache.
807
+
808
+ Returns
809
+ -------
810
+ NamedTuple
811
+ Returned directly from the LRU Cache used internally.
812
+ """
813
+ return self._fetch_block_cached.cache_info()
814
+
815
+ def __getstate__(self) -> dict[str, Any]:
816
+ state = self.__dict__
817
+ del state["_fetch_block_cached"]
818
+ del state["_thread_executor"]
819
+ del state["_fetch_future_block_number"]
820
+ del state["_fetch_future"]
821
+ del state["_fetch_future_lock"]
822
+ return state
823
+
824
+ def __setstate__(self, state) -> None:
825
+ self.__dict__.update(state)
826
+ self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
827
+ self._thread_executor = ThreadPoolExecutor(max_workers=1)
828
+ self._fetch_future_block_number = None
829
+ self._fetch_future = None
830
+ self._fetch_future_lock = threading.Lock()
831
+
832
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
833
+ if start is None:
834
+ start = 0
835
+ if end is None:
836
+ end = self.size
837
+ if start >= self.size or start >= end:
838
+ return b""
839
+
840
+ # byte position -> block numbers
841
+ start_block_number = start // self.blocksize
842
+ end_block_number = end // self.blocksize
843
+
844
+ fetch_future_block_number = None
845
+ fetch_future = None
846
+ with self._fetch_future_lock:
847
+ # Background thread is running. Check we we can or must join it.
848
+ if self._fetch_future is not None:
849
+ assert self._fetch_future_block_number is not None
850
+ if self._fetch_future.done():
851
+ logger.info("BlockCache joined background fetch without waiting.")
852
+ self._fetch_block_cached.add_key(
853
+ self._fetch_future.result(), self._fetch_future_block_number
854
+ )
855
+ # Cleanup the fetch variables. Done with fetching the block.
856
+ self._fetch_future_block_number = None
857
+ self._fetch_future = None
858
+ else:
859
+ # Must join if we need the block for the current fetch
860
+ must_join = bool(
861
+ start_block_number
862
+ <= self._fetch_future_block_number
863
+ <= end_block_number
864
+ )
865
+ if must_join:
866
+ # Copy to the local variables to release lock
867
+ # before waiting for result
868
+ fetch_future_block_number = self._fetch_future_block_number
869
+ fetch_future = self._fetch_future
870
+
871
+ # Cleanup the fetch variables. Have a local copy.
872
+ self._fetch_future_block_number = None
873
+ self._fetch_future = None
874
+
875
+ # Need to wait for the future for the current read
876
+ if fetch_future is not None:
877
+ logger.info("BlockCache waiting for background fetch.")
878
+ # Wait until result and put it in cache
879
+ self._fetch_block_cached.add_key(
880
+ fetch_future.result(), fetch_future_block_number
881
+ )
882
+
883
+ # these are cached, so safe to do multiple calls for the same start and end.
884
+ for block_number in range(start_block_number, end_block_number + 1):
885
+ self._fetch_block_cached(block_number)
886
+
887
+ # fetch next block in the background if nothing is running in the background,
888
+ # the block is within file and it is not already cached
889
+ end_block_plus_1 = end_block_number + 1
890
+ with self._fetch_future_lock:
891
+ if (
892
+ self._fetch_future is None
893
+ and end_block_plus_1 <= self.nblocks
894
+ and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
895
+ ):
896
+ self._fetch_future_block_number = end_block_plus_1
897
+ self._fetch_future = self._thread_executor.submit(
898
+ self._fetch_block, end_block_plus_1, "async"
899
+ )
900
+
901
+ return self._read_cache(
902
+ start,
903
+ end,
904
+ start_block_number=start_block_number,
905
+ end_block_number=end_block_number,
906
+ )
907
+
908
+ def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
909
+ """
910
+ Fetch the block of data for `block_number`.
911
+ """
912
+ if block_number > self.nblocks:
913
+ raise ValueError(
914
+ f"'block_number={block_number}' is greater than "
915
+ f"the number of blocks ({self.nblocks})"
916
+ )
917
+
918
+ start = block_number * self.blocksize
919
+ end = start + self.blocksize
920
+ logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
921
+ self.total_requested_bytes += end - start
922
+ self.miss_count += 1
923
+ block_contents = super()._fetch(start, end)
924
+ return block_contents
925
+
926
+ def _read_cache(
927
+ self, start: int, end: int, start_block_number: int, end_block_number: int
928
+ ) -> bytes:
929
+ """
930
+ Read from our block cache.
931
+
932
+ Parameters
933
+ ----------
934
+ start, end : int
935
+ The start and end byte positions.
936
+ start_block_number, end_block_number : int
937
+ The start and end block numbers.
938
+ """
939
+ start_pos = start % self.blocksize
940
+ end_pos = end % self.blocksize
941
+
942
+ # kind of pointless to count this as a hit, but it is
943
+ self.hit_count += 1
944
+
945
+ if start_block_number == end_block_number:
946
+ block = self._fetch_block_cached(start_block_number)
947
+ return block[start_pos:end_pos]
948
+
949
+ else:
950
+ # read from the initial
951
+ out = [self._fetch_block_cached(start_block_number)[start_pos:]]
952
+
953
+ # intermediate blocks
954
+ # Note: it'd be nice to combine these into one big request. However
955
+ # that doesn't play nicely with our LRU cache.
956
+ out.extend(
957
+ map(
958
+ self._fetch_block_cached,
959
+ range(start_block_number + 1, end_block_number),
960
+ )
961
+ )
962
+
963
+ # final block
964
+ out.append(self._fetch_block_cached(end_block_number)[:end_pos])
965
+
966
+ return b"".join(out)
967
+
968
+
969
+ caches: dict[str | None, type[BaseCache]] = {
970
+ # one custom case
971
+ None: BaseCache,
972
+ }
973
+
974
+
975
+ def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
976
+ """'Register' cache implementation.
977
+
978
+ Parameters
979
+ ----------
980
+ clobber: bool, optional
981
+ If set to True (default is False) - allow to overwrite existing
982
+ entry.
983
+
984
+ Raises
985
+ ------
986
+ ValueError
987
+ """
988
+ name = cls.name
989
+ if not clobber and name in caches:
990
+ raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
991
+ caches[name] = cls
992
+
993
+
994
+ for c in (
995
+ BaseCache,
996
+ MMapCache,
997
+ BytesCache,
998
+ ReadAheadCache,
999
+ BlockCache,
1000
+ FirstChunkCache,
1001
+ AllBytes,
1002
+ KnownPartsOfAFile,
1003
+ BackgroundBlockCache,
1004
+ ):
1005
+ register_cache(c)
venv/lib/python3.12/site-packages/fsspec/callbacks.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import wraps
2
+
3
+
4
+ class Callback:
5
+ """
6
+ Base class and interface for callback mechanism
7
+
8
+ This class can be used directly for monitoring file transfers by
9
+ providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
10
+ below), or subclassed for more specialised behaviour.
11
+
12
+ Parameters
13
+ ----------
14
+ size: int (optional)
15
+ Nominal quantity for the value that corresponds to a complete
16
+ transfer, e.g., total number of tiles or total number of
17
+ bytes
18
+ value: int (0)
19
+ Starting internal counter value
20
+ hooks: dict or None
21
+ A dict of named functions to be called on each update. The signature
22
+ of these must be ``f(size, value, **kwargs)``
23
+ """
24
+
25
+ def __init__(self, size=None, value=0, hooks=None, **kwargs):
26
+ self.size = size
27
+ self.value = value
28
+ self.hooks = hooks or {}
29
+ self.kw = kwargs
30
+
31
+ def __enter__(self):
32
+ return self
33
+
34
+ def __exit__(self, *exc_args):
35
+ self.close()
36
+
37
+ def close(self):
38
+ """Close callback."""
39
+
40
+ def branched(self, path_1, path_2, **kwargs):
41
+ """
42
+ Return callback for child transfers
43
+
44
+ If this callback is operating at a higher level, e.g., put, which may
45
+ trigger transfers that can also be monitored. The function returns a callback
46
+ that has to be passed to the child method, e.g., put_file,
47
+ as `callback=` argument.
48
+
49
+ The implementation uses `callback.branch` for compatibility.
50
+ When implementing callbacks, it is recommended to override this function instead
51
+ of `branch` and avoid calling `super().branched(...)`.
52
+
53
+ Prefer using this function over `branch`.
54
+
55
+ Parameters
56
+ ----------
57
+ path_1: str
58
+ Child's source path
59
+ path_2: str
60
+ Child's destination path
61
+ **kwargs:
62
+ Arbitrary keyword arguments
63
+
64
+ Returns
65
+ -------
66
+ callback: Callback
67
+ A callback instance to be passed to the child method
68
+ """
69
+ self.branch(path_1, path_2, kwargs)
70
+ # mutate kwargs so that we can force the caller to pass "callback=" explicitly
71
+ return kwargs.pop("callback", DEFAULT_CALLBACK)
72
+
73
+ def branch_coro(self, fn):
74
+ """
75
+ Wraps a coroutine, and pass a new child callback to it.
76
+ """
77
+
78
+ @wraps(fn)
79
+ async def func(path1, path2: str, **kwargs):
80
+ with self.branched(path1, path2, **kwargs) as child:
81
+ return await fn(path1, path2, callback=child, **kwargs)
82
+
83
+ return func
84
+
85
+ def set_size(self, size):
86
+ """
87
+ Set the internal maximum size attribute
88
+
89
+ Usually called if not initially set at instantiation. Note that this
90
+ triggers a ``call()``.
91
+
92
+ Parameters
93
+ ----------
94
+ size: int
95
+ """
96
+ self.size = size
97
+ self.call()
98
+
99
+ def absolute_update(self, value):
100
+ """
101
+ Set the internal value state
102
+
103
+ Triggers ``call()``
104
+
105
+ Parameters
106
+ ----------
107
+ value: int
108
+ """
109
+ self.value = value
110
+ self.call()
111
+
112
+ def relative_update(self, inc=1):
113
+ """
114
+ Delta increment the internal counter
115
+
116
+ Triggers ``call()``
117
+
118
+ Parameters
119
+ ----------
120
+ inc: int
121
+ """
122
+ self.value += inc
123
+ self.call()
124
+
125
+ def call(self, hook_name=None, **kwargs):
126
+ """
127
+ Execute hook(s) with current state
128
+
129
+ Each function is passed the internal size and current value
130
+
131
+ Parameters
132
+ ----------
133
+ hook_name: str or None
134
+ If given, execute on this hook
135
+ kwargs: passed on to (all) hook(s)
136
+ """
137
+ if not self.hooks:
138
+ return
139
+ kw = self.kw.copy()
140
+ kw.update(kwargs)
141
+ if hook_name:
142
+ if hook_name not in self.hooks:
143
+ return
144
+ return self.hooks[hook_name](self.size, self.value, **kw)
145
+ for hook in self.hooks.values() or []:
146
+ hook(self.size, self.value, **kw)
147
+
148
+ def wrap(self, iterable):
149
+ """
150
+ Wrap an iterable to call ``relative_update`` on each iterations
151
+
152
+ Parameters
153
+ ----------
154
+ iterable: Iterable
155
+ The iterable that is being wrapped
156
+ """
157
+ for item in iterable:
158
+ self.relative_update()
159
+ yield item
160
+
161
+ def branch(self, path_1, path_2, kwargs):
162
+ """
163
+ Set callbacks for child transfers
164
+
165
+ If this callback is operating at a higher level, e.g., put, which may
166
+ trigger transfers that can also be monitored. The passed kwargs are
167
+ to be *mutated* to add ``callback=``, if this class supports branching
168
+ to children.
169
+
170
+ Parameters
171
+ ----------
172
+ path_1: str
173
+ Child's source path
174
+ path_2: str
175
+ Child's destination path
176
+ kwargs: dict
177
+ arguments passed to child method, e.g., put_file.
178
+
179
+ Returns
180
+ -------
181
+
182
+ """
183
+ return None
184
+
185
+ def no_op(self, *_, **__):
186
+ pass
187
+
188
+ def __getattr__(self, item):
189
+ """
190
+ If undefined methods are called on this class, nothing happens
191
+ """
192
+ return self.no_op
193
+
194
+ @classmethod
195
+ def as_callback(cls, maybe_callback=None):
196
+ """Transform callback=... into Callback instance
197
+
198
+ For the special value of ``None``, return the global instance of
199
+ ``NoOpCallback``. This is an alternative to including
200
+ ``callback=DEFAULT_CALLBACK`` directly in a method signature.
201
+ """
202
+ if maybe_callback is None:
203
+ return DEFAULT_CALLBACK
204
+ return maybe_callback
205
+
206
+
207
+ class NoOpCallback(Callback):
208
+ """
209
+ This implementation of Callback does exactly nothing
210
+ """
211
+
212
+ def call(self, *args, **kwargs):
213
+ return None
214
+
215
+
216
+ class DotPrinterCallback(Callback):
217
+ """
218
+ Simple example Callback implementation
219
+
220
+ Almost identical to Callback with a hook that prints a char; here we
221
+ demonstrate how the outer layer may print "#" and the inner layer "."
222
+ """
223
+
224
+ def __init__(self, chr_to_print="#", **kwargs):
225
+ self.chr = chr_to_print
226
+ super().__init__(**kwargs)
227
+
228
+ def branch(self, path_1, path_2, kwargs):
229
+ """Mutate kwargs to add new instance with different print char"""
230
+ kwargs["callback"] = DotPrinterCallback(".")
231
+
232
+ def call(self, **kwargs):
233
+ """Just outputs a character"""
234
+ print(self.chr, end="")
235
+
236
+
237
+ class TqdmCallback(Callback):
238
+ """
239
+ A callback to display a progress bar using tqdm
240
+
241
+ Parameters
242
+ ----------
243
+ tqdm_kwargs : dict, (optional)
244
+ Any argument accepted by the tqdm constructor.
245
+ See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
246
+ Will be forwarded to `tqdm_cls`.
247
+ tqdm_cls: (optional)
248
+ subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
249
+
250
+ Examples
251
+ --------
252
+ >>> import fsspec
253
+ >>> from fsspec.callbacks import TqdmCallback
254
+ >>> fs = fsspec.filesystem("memory")
255
+ >>> path2distant_data = "/your-path"
256
+ >>> fs.upload(
257
+ ".",
258
+ path2distant_data,
259
+ recursive=True,
260
+ callback=TqdmCallback(),
261
+ )
262
+
263
+ You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
264
+
265
+ >>> fs.upload(
266
+ ".",
267
+ path2distant_data,
268
+ recursive=True,
269
+ callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
270
+ )
271
+
272
+ You can also customize the progress bar by passing a subclass of `tqdm`.
273
+
274
+ .. code-block:: python
275
+
276
+ class TqdmFormat(tqdm):
277
+ '''Provides a `total_time` format parameter'''
278
+ @property
279
+ def format_dict(self):
280
+ d = super().format_dict
281
+ total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
282
+ d.update(total_time=self.format_interval(total_time) + " in total")
283
+ return d
284
+
285
+ >>> with TqdmCallback(
286
+ tqdm_kwargs={
287
+ "desc": "desc",
288
+ "bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
289
+ },
290
+ tqdm_cls=TqdmFormat,
291
+ ) as callback:
292
+ fs.upload(".", path2distant_data, recursive=True, callback=callback)
293
+ """
294
+
295
+ def __init__(self, tqdm_kwargs=None, *args, **kwargs):
296
+ try:
297
+ from tqdm import tqdm
298
+
299
+ except ImportError as exce:
300
+ raise ImportError(
301
+ "Using TqdmCallback requires tqdm to be installed"
302
+ ) from exce
303
+
304
+ self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
305
+ self._tqdm_kwargs = tqdm_kwargs or {}
306
+ self.tqdm = None
307
+ super().__init__(*args, **kwargs)
308
+
309
+ def call(self, *args, **kwargs):
310
+ if self.tqdm is None:
311
+ self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
312
+ self.tqdm.total = self.size
313
+ self.tqdm.update(self.value - self.tqdm.n)
314
+
315
+ def close(self):
316
+ if self.tqdm is not None:
317
+ self.tqdm.close()
318
+ self.tqdm = None
319
+
320
+ def __del__(self):
321
+ return self.close()
322
+
323
+
324
+ DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
venv/lib/python3.12/site-packages/fsspec/compression.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper functions for a standard streaming compression API"""
2
+
3
+ from zipfile import ZipFile
4
+
5
+ import fsspec.utils
6
+ from fsspec.spec import AbstractBufferedFile
7
+
8
+
9
+ def noop_file(file, mode, **kwargs):
10
+ return file
11
+
12
+
13
+ # TODO: files should also be available as contexts
14
+ # should be functions of the form func(infile, mode=, **kwargs) -> file-like
15
+ compr = {None: noop_file}
16
+
17
+
18
+ def register_compression(name, callback, extensions, force=False):
19
+ """Register an "inferable" file compression type.
20
+
21
+ Registers transparent file compression type for use with fsspec.open.
22
+ Compression can be specified by name in open, or "infer"-ed for any files
23
+ ending with the given extensions.
24
+
25
+ Args:
26
+ name: (str) The compression type name. Eg. "gzip".
27
+ callback: A callable of form (infile, mode, **kwargs) -> file-like.
28
+ Accepts an input file-like object, the target mode and kwargs.
29
+ Returns a wrapped file-like object.
30
+ extensions: (str, Iterable[str]) A file extension, or list of file
31
+ extensions for which to infer this compression scheme. Eg. "gz".
32
+ force: (bool) Force re-registration of compression type or extensions.
33
+
34
+ Raises:
35
+ ValueError: If name or extensions already registered, and not force.
36
+
37
+ """
38
+ if isinstance(extensions, str):
39
+ extensions = [extensions]
40
+
41
+ # Validate registration
42
+ if name in compr and not force:
43
+ raise ValueError(f"Duplicate compression registration: {name}")
44
+
45
+ for ext in extensions:
46
+ if ext in fsspec.utils.compressions and not force:
47
+ raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
48
+
49
+ compr[name] = callback
50
+
51
+ for ext in extensions:
52
+ fsspec.utils.compressions[ext] = name
53
+
54
+
55
+ def unzip(infile, mode="rb", filename=None, **kwargs):
56
+ if "r" not in mode:
57
+ filename = filename or "file"
58
+ z = ZipFile(infile, mode="w", **kwargs)
59
+ fo = z.open(filename, mode="w")
60
+ fo.close = lambda closer=fo.close: closer() or z.close()
61
+ return fo
62
+ z = ZipFile(infile)
63
+ if filename is None:
64
+ filename = z.namelist()[0]
65
+ return z.open(filename, mode="r", **kwargs)
66
+
67
+
68
+ register_compression("zip", unzip, "zip")
69
+
70
+ try:
71
+ from bz2 import BZ2File
72
+ except ImportError:
73
+ pass
74
+ else:
75
+ register_compression("bz2", BZ2File, "bz2")
76
+
77
+ try: # pragma: no cover
78
+ from isal import igzip
79
+
80
+ def isal(infile, mode="rb", **kwargs):
81
+ return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
82
+
83
+ register_compression("gzip", isal, "gz")
84
+ except ImportError:
85
+ from gzip import GzipFile
86
+
87
+ register_compression(
88
+ "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
89
+ )
90
+
91
+ try:
92
+ from lzma import LZMAFile
93
+
94
+ register_compression("lzma", LZMAFile, "lzma")
95
+ register_compression("xz", LZMAFile, "xz")
96
+ except ImportError:
97
+ pass
98
+
99
+ try:
100
+ import lzmaffi
101
+
102
+ register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
103
+ register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
104
+ except ImportError:
105
+ pass
106
+
107
+
108
+ class SnappyFile(AbstractBufferedFile):
109
+ def __init__(self, infile, mode, **kwargs):
110
+ import snappy
111
+
112
+ super().__init__(
113
+ fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
114
+ )
115
+ self.infile = infile
116
+ if "r" in mode:
117
+ self.codec = snappy.StreamDecompressor()
118
+ else:
119
+ self.codec = snappy.StreamCompressor()
120
+
121
+ def _upload_chunk(self, final=False):
122
+ self.buffer.seek(0)
123
+ out = self.codec.add_chunk(self.buffer.read())
124
+ self.infile.write(out)
125
+ return True
126
+
127
+ def seek(self, loc, whence=0):
128
+ raise NotImplementedError("SnappyFile is not seekable")
129
+
130
+ def seekable(self):
131
+ return False
132
+
133
+ def _fetch_range(self, start, end):
134
+ """Get the specified set of bytes from remote"""
135
+ data = self.infile.read(end - start)
136
+ return self.codec.decompress(data)
137
+
138
+
139
+ try:
140
+ import snappy
141
+
142
+ snappy.compress(b"")
143
+ # Snappy may use the .sz file extension, but this is not part of the
144
+ # standard implementation.
145
+ register_compression("snappy", SnappyFile, [])
146
+
147
+ except (ImportError, NameError, AttributeError):
148
+ pass
149
+
150
+ try:
151
+ import lz4.frame
152
+
153
+ register_compression("lz4", lz4.frame.open, "lz4")
154
+ except ImportError:
155
+ pass
156
+
157
+ try:
158
+ import zstandard as zstd
159
+
160
+ def zstandard_file(infile, mode="rb"):
161
+ if "r" in mode:
162
+ cctx = zstd.ZstdDecompressor()
163
+ return cctx.stream_reader(infile)
164
+ else:
165
+ cctx = zstd.ZstdCompressor(level=10)
166
+ return cctx.stream_writer(infile)
167
+
168
+ register_compression("zstd", zstandard_file, "zst")
169
+ except ImportError:
170
+ pass
171
+
172
+
173
+ def available_compressions():
174
+ """Return a list of the implemented compressions."""
175
+ return list(compr)
venv/lib/python3.12/site-packages/fsspec/config.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import configparser
4
+ import json
5
+ import os
6
+ import warnings
7
+ from typing import Any
8
+
9
+ conf: dict[str, dict[str, Any]] = {}
10
+ default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
11
+ conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
12
+
13
+
14
+ def set_conf_env(conf_dict, envdict=os.environ):
15
+ """Set config values from environment variables
16
+
17
+ Looks for variables of the form ``FSSPEC_<protocol>`` and
18
+ ``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
19
+ as a json dictionary and used to ``update`` the config of the
20
+ corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
21
+ attempt to convert the string value, but the kwarg keys will be lower-cased.
22
+
23
+ The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
24
+ ``FSSPEC_<protocol>`` ones.
25
+
26
+ Parameters
27
+ ----------
28
+ conf_dict : dict(str, dict)
29
+ This dict will be mutated
30
+ envdict : dict-like(str, str)
31
+ Source for the values - usually the real environment
32
+ """
33
+ kwarg_keys = []
34
+ for key in envdict:
35
+ if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
36
+ if key.count("_") > 1:
37
+ kwarg_keys.append(key)
38
+ continue
39
+ try:
40
+ value = json.loads(envdict[key])
41
+ except json.decoder.JSONDecodeError as ex:
42
+ warnings.warn(
43
+ f"Ignoring environment variable {key} due to a parse failure: {ex}"
44
+ )
45
+ else:
46
+ if isinstance(value, dict):
47
+ _, proto = key.split("_", 1)
48
+ conf_dict.setdefault(proto.lower(), {}).update(value)
49
+ else:
50
+ warnings.warn(
51
+ f"Ignoring environment variable {key} due to not being a dict:"
52
+ f" {type(value)}"
53
+ )
54
+ elif key.startswith("FSSPEC"):
55
+ warnings.warn(
56
+ f"Ignoring environment variable {key} due to having an unexpected name"
57
+ )
58
+
59
+ for key in kwarg_keys:
60
+ _, proto, kwarg = key.split("_", 2)
61
+ conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
62
+
63
+
64
+ def set_conf_files(cdir, conf_dict):
65
+ """Set config values from files
66
+
67
+ Scans for INI and JSON files in the given dictionary, and uses their
68
+ contents to set the config. In case of repeated values, later values
69
+ win.
70
+
71
+ In the case of INI files, all values are strings, and these will not
72
+ be converted.
73
+
74
+ Parameters
75
+ ----------
76
+ cdir : str
77
+ Directory to search
78
+ conf_dict : dict(str, dict)
79
+ This dict will be mutated
80
+ """
81
+ if not os.path.isdir(cdir):
82
+ return
83
+ allfiles = sorted(os.listdir(cdir))
84
+ for fn in allfiles:
85
+ if fn.endswith(".ini"):
86
+ ini = configparser.ConfigParser()
87
+ ini.read(os.path.join(cdir, fn))
88
+ for key in ini:
89
+ if key == "DEFAULT":
90
+ continue
91
+ conf_dict.setdefault(key, {}).update(dict(ini[key]))
92
+ if fn.endswith(".json"):
93
+ with open(os.path.join(cdir, fn)) as f:
94
+ js = json.load(f)
95
+ for key in js:
96
+ conf_dict.setdefault(key, {}).update(dict(js[key]))
97
+
98
+
99
+ def apply_config(cls, kwargs, conf_dict=None):
100
+ """Supply default values for kwargs when instantiating class
101
+
102
+ Augments the passed kwargs, by finding entries in the config dict
103
+ which match the classes ``.protocol`` attribute (one or more str)
104
+
105
+ Parameters
106
+ ----------
107
+ cls : file system implementation
108
+ kwargs : dict
109
+ conf_dict : dict of dict
110
+ Typically this is the global configuration
111
+
112
+ Returns
113
+ -------
114
+ dict : the modified set of kwargs
115
+ """
116
+ if conf_dict is None:
117
+ conf_dict = conf
118
+ protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
119
+ kw = {}
120
+ for proto in protos:
121
+ # default kwargs from the current state of the config
122
+ if proto in conf_dict:
123
+ kw.update(conf_dict[proto])
124
+ # explicit kwargs always win
125
+ kw.update(**kwargs)
126
+ kwargs = kw
127
+ return kwargs
128
+
129
+
130
+ set_conf_files(conf_dir, conf)
131
+ set_conf_env(conf)
venv/lib/python3.12/site-packages/fsspec/conftest.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import sys
5
+ import time
6
+
7
+ import pytest
8
+
9
+ import fsspec
10
+ from fsspec.implementations.cached import CachingFileSystem
11
+
12
+
13
+ @pytest.fixture()
14
+ def m():
15
+ """
16
+ Fixture providing a memory filesystem.
17
+ """
18
+ m = fsspec.filesystem("memory")
19
+ m.store.clear()
20
+ m.pseudo_dirs.clear()
21
+ m.pseudo_dirs.append("")
22
+ try:
23
+ yield m
24
+ finally:
25
+ m.store.clear()
26
+ m.pseudo_dirs.clear()
27
+ m.pseudo_dirs.append("")
28
+
29
+
30
+ @pytest.fixture
31
+ def ftp_writable(tmpdir):
32
+ """
33
+ Fixture providing a writable FTP filesystem.
34
+ """
35
+ pytest.importorskip("pyftpdlib")
36
+ from fsspec.implementations.ftp import FTPFileSystem
37
+
38
+ FTPFileSystem.clear_instance_cache() # remove lingering connections
39
+ CachingFileSystem.clear_instance_cache()
40
+ d = str(tmpdir)
41
+ with open(os.path.join(d, "out"), "wb") as f:
42
+ f.write(b"hello" * 10000)
43
+ P = subprocess.Popen(
44
+ [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
45
+ )
46
+ try:
47
+ time.sleep(1)
48
+ yield "localhost", 2121, "user", "pass"
49
+ finally:
50
+ P.terminate()
51
+ P.wait()
52
+ try:
53
+ shutil.rmtree(tmpdir)
54
+ except Exception:
55
+ pass
venv/lib/python3.12/site-packages/fsspec/core.py ADDED
@@ -0,0 +1,743 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ import os
6
+ import re
7
+ from glob import has_magic
8
+ from pathlib import Path
9
+
10
+ # for backwards compat, we export cache things from here too
11
+ from fsspec.caching import ( # noqa: F401
12
+ BaseCache,
13
+ BlockCache,
14
+ BytesCache,
15
+ MMapCache,
16
+ ReadAheadCache,
17
+ caches,
18
+ )
19
+ from fsspec.compression import compr
20
+ from fsspec.config import conf
21
+ from fsspec.registry import filesystem, get_filesystem_class
22
+ from fsspec.utils import (
23
+ _unstrip_protocol,
24
+ build_name_function,
25
+ infer_compression,
26
+ stringify_path,
27
+ )
28
+
29
+ logger = logging.getLogger("fsspec")
30
+
31
+
32
+ class OpenFile:
33
+ """
34
+ File-like object to be used in a context
35
+
36
+ Can layer (buffered) text-mode and compression over any file-system, which
37
+ are typically binary-only.
38
+
39
+ These instances are safe to serialize, as the low-level file object
40
+ is not created until invoked using ``with``.
41
+
42
+ Parameters
43
+ ----------
44
+ fs: FileSystem
45
+ The file system to use for opening the file. Should be a subclass or duck-type
46
+ with ``fsspec.spec.AbstractFileSystem``
47
+ path: str
48
+ Location to open
49
+ mode: str like 'rb', optional
50
+ Mode of the opened file
51
+ compression: str or None, optional
52
+ Compression to apply
53
+ encoding: str or None, optional
54
+ The encoding to use if opened in text mode.
55
+ errors: str or None, optional
56
+ How to handle encoding errors if opened in text mode.
57
+ newline: None or str
58
+ Passed to TextIOWrapper in text mode, how to handle line endings.
59
+ autoopen: bool
60
+ If True, calls open() immediately. Mostly used by pickle
61
+ pos: int
62
+ If given and autoopen is True, seek to this location immediately
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ fs,
68
+ path,
69
+ mode="rb",
70
+ compression=None,
71
+ encoding=None,
72
+ errors=None,
73
+ newline=None,
74
+ ):
75
+ self.fs = fs
76
+ self.path = path
77
+ self.mode = mode
78
+ self.compression = get_compression(path, compression)
79
+ self.encoding = encoding
80
+ self.errors = errors
81
+ self.newline = newline
82
+ self.fobjects = []
83
+
84
+ def __reduce__(self):
85
+ return (
86
+ OpenFile,
87
+ (
88
+ self.fs,
89
+ self.path,
90
+ self.mode,
91
+ self.compression,
92
+ self.encoding,
93
+ self.errors,
94
+ self.newline,
95
+ ),
96
+ )
97
+
98
+ def __repr__(self):
99
+ return f"<OpenFile '{self.path}'>"
100
+
101
+ def __enter__(self):
102
+ mode = self.mode.replace("t", "").replace("b", "") + "b"
103
+
104
+ try:
105
+ f = self.fs.open(self.path, mode=mode)
106
+ except FileNotFoundError as e:
107
+ if has_magic(self.path):
108
+ raise FileNotFoundError(
109
+ "%s not found. The URL contains glob characters: you maybe needed\n"
110
+ "to pass expand=True in fsspec.open() or the storage_options of \n"
111
+ "your library. You can also set the config value 'open_expand'\n"
112
+ "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
113
+ self.path,
114
+ ) from e
115
+ raise
116
+
117
+ self.fobjects = [f]
118
+
119
+ if self.compression is not None:
120
+ compress = compr[self.compression]
121
+ f = compress(f, mode=mode[0])
122
+ self.fobjects.append(f)
123
+
124
+ if "b" not in self.mode:
125
+ # assume, for example, that 'r' is equivalent to 'rt' as in builtin
126
+ f = PickleableTextIOWrapper(
127
+ f, encoding=self.encoding, errors=self.errors, newline=self.newline
128
+ )
129
+ self.fobjects.append(f)
130
+
131
+ return self.fobjects[-1]
132
+
133
+ def __exit__(self, *args):
134
+ self.close()
135
+
136
+ @property
137
+ def full_name(self):
138
+ return _unstrip_protocol(self.path, self.fs)
139
+
140
+ def open(self):
141
+ """Materialise this as a real open file without context
142
+
143
+ The OpenFile object should be explicitly closed to avoid enclosed file
144
+ instances persisting. You must, therefore, keep a reference to the OpenFile
145
+ during the life of the file-like it generates.
146
+ """
147
+ return self.__enter__()
148
+
149
+ def close(self):
150
+ """Close all encapsulated file objects"""
151
+ for f in reversed(self.fobjects):
152
+ if "r" not in self.mode and not f.closed:
153
+ f.flush()
154
+ f.close()
155
+ self.fobjects.clear()
156
+
157
+
158
+ class OpenFiles(list):
159
+ """List of OpenFile instances
160
+
161
+ Can be used in a single context, which opens and closes all of the
162
+ contained files. Normal list access to get the elements works as
163
+ normal.
164
+
165
+ A special case is made for caching filesystems - the files will
166
+ be down/uploaded together at the start or end of the context, and
167
+ this may happen concurrently, if the target filesystem supports it.
168
+ """
169
+
170
+ def __init__(self, *args, mode="rb", fs=None):
171
+ self.mode = mode
172
+ self.fs = fs
173
+ self.files = []
174
+ super().__init__(*args)
175
+
176
+ def __enter__(self):
177
+ if self.fs is None:
178
+ raise ValueError("Context has already been used")
179
+
180
+ fs = self.fs
181
+ while True:
182
+ if hasattr(fs, "open_many"):
183
+ # check for concurrent cache download; or set up for upload
184
+ self.files = fs.open_many(self)
185
+ return self.files
186
+ if hasattr(fs, "fs") and fs.fs is not None:
187
+ fs = fs.fs
188
+ else:
189
+ break
190
+ return [s.__enter__() for s in self]
191
+
192
+ def __exit__(self, *args):
193
+ fs = self.fs
194
+ [s.__exit__(*args) for s in self]
195
+ if "r" not in self.mode:
196
+ while True:
197
+ if hasattr(fs, "open_many"):
198
+ # check for concurrent cache upload
199
+ fs.commit_many(self.files)
200
+ return
201
+ if hasattr(fs, "fs") and fs.fs is not None:
202
+ fs = fs.fs
203
+ else:
204
+ break
205
+
206
+ def __getitem__(self, item):
207
+ out = super().__getitem__(item)
208
+ if isinstance(item, slice):
209
+ return OpenFiles(out, mode=self.mode, fs=self.fs)
210
+ return out
211
+
212
+ def __repr__(self):
213
+ return f"<List of {len(self)} OpenFile instances>"
214
+
215
+
216
+ def open_files(
217
+ urlpath,
218
+ mode="rb",
219
+ compression=None,
220
+ encoding="utf8",
221
+ errors=None,
222
+ name_function=None,
223
+ num=1,
224
+ protocol=None,
225
+ newline=None,
226
+ auto_mkdir=True,
227
+ expand=True,
228
+ **kwargs,
229
+ ):
230
+ """Given a path or paths, return a list of ``OpenFile`` objects.
231
+
232
+ For writing, a str path must contain the "*" character, which will be filled
233
+ in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
234
+
235
+ For either reading or writing, can instead provide explicit list of paths.
236
+
237
+ Parameters
238
+ ----------
239
+ urlpath: string or list
240
+ Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
241
+ to read from alternative filesystems. To read from multiple files you
242
+ can pass a globstring or a list of paths, with the caveat that they
243
+ must all have the same protocol.
244
+ mode: 'rb', 'wt', etc.
245
+ compression: string or None
246
+ If given, open file using compression codec. Can either be a compression
247
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
248
+ compression from the filename suffix.
249
+ encoding: str
250
+ For text mode only
251
+ errors: None or str
252
+ Passed to TextIOWrapper in text mode
253
+ name_function: function or None
254
+ if opening a set of files for writing, those files do not yet exist,
255
+ so we need to generate their names by formatting the urlpath for
256
+ each sequence number
257
+ num: int [1]
258
+ if writing mode, number of files we expect to create (passed to
259
+ name+function)
260
+ protocol: str or None
261
+ If given, overrides the protocol found in the URL.
262
+ newline: bytes or None
263
+ Used for line terminator in text mode. If None, uses system default;
264
+ if blank, uses no translation.
265
+ auto_mkdir: bool (True)
266
+ If in write mode, this will ensure the target directory exists before
267
+ writing, by calling ``fs.mkdirs(exist_ok=True)``.
268
+ expand: bool
269
+ **kwargs: dict
270
+ Extra options that make sense to a particular storage connection, e.g.
271
+ host, port, username, password, etc.
272
+
273
+ Examples
274
+ --------
275
+ >>> files = open_files('2015-*-*.csv') # doctest: +SKIP
276
+ >>> files = open_files(
277
+ ... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
278
+ ... ) # doctest: +SKIP
279
+
280
+ Returns
281
+ -------
282
+ An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
283
+ be used as a single context
284
+
285
+ Notes
286
+ -----
287
+ For a full list of the available protocols and the implementations that
288
+ they map across to see the latest online documentation:
289
+
290
+ - For implementations built into ``fsspec`` see
291
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
292
+ - For implementations in separate packages see
293
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
294
+ """
295
+ fs, fs_token, paths = get_fs_token_paths(
296
+ urlpath,
297
+ mode,
298
+ num=num,
299
+ name_function=name_function,
300
+ storage_options=kwargs,
301
+ protocol=protocol,
302
+ expand=expand,
303
+ )
304
+ if fs.protocol == "file":
305
+ fs.auto_mkdir = auto_mkdir
306
+ elif "r" not in mode and auto_mkdir:
307
+ parents = {fs._parent(path) for path in paths}
308
+ for parent in parents:
309
+ try:
310
+ fs.makedirs(parent, exist_ok=True)
311
+ except PermissionError:
312
+ pass
313
+ return OpenFiles(
314
+ [
315
+ OpenFile(
316
+ fs,
317
+ path,
318
+ mode=mode,
319
+ compression=compression,
320
+ encoding=encoding,
321
+ errors=errors,
322
+ newline=newline,
323
+ )
324
+ for path in paths
325
+ ],
326
+ mode=mode,
327
+ fs=fs,
328
+ )
329
+
330
+
331
+ def _un_chain(path, kwargs):
332
+ # Avoid a circular import
333
+ from fsspec.implementations.cached import CachingFileSystem
334
+
335
+ if "::" in path:
336
+ x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
337
+ bits = []
338
+ for p in path.split("::"):
339
+ if "://" in p or x.match(p):
340
+ bits.append(p)
341
+ else:
342
+ bits.append(p + "://")
343
+ else:
344
+ bits = [path]
345
+ # [[url, protocol, kwargs], ...]
346
+ out = []
347
+ previous_bit = None
348
+ kwargs = kwargs.copy()
349
+ for bit in reversed(bits):
350
+ protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
351
+ cls = get_filesystem_class(protocol)
352
+ extra_kwargs = cls._get_kwargs_from_urls(bit)
353
+ kws = kwargs.pop(protocol, {})
354
+ if bit is bits[0]:
355
+ kws.update(kwargs)
356
+ kw = dict(
357
+ **{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
358
+ **kws,
359
+ )
360
+ bit = cls._strip_protocol(bit)
361
+ if "target_protocol" not in kw and issubclass(cls, CachingFileSystem):
362
+ bit = previous_bit
363
+ out.append((bit, protocol, kw))
364
+ previous_bit = bit
365
+ out.reverse()
366
+ return out
367
+
368
+
369
+ def url_to_fs(url, **kwargs):
370
+ """
371
+ Turn fully-qualified and potentially chained URL into filesystem instance
372
+
373
+ Parameters
374
+ ----------
375
+ url : str
376
+ The fsspec-compatible URL
377
+ **kwargs: dict
378
+ Extra options that make sense to a particular storage connection, e.g.
379
+ host, port, username, password, etc.
380
+
381
+ Returns
382
+ -------
383
+ filesystem : FileSystem
384
+ The new filesystem discovered from ``url`` and created with
385
+ ``**kwargs``.
386
+ urlpath : str
387
+ The file-systems-specific URL for ``url``.
388
+ """
389
+ url = stringify_path(url)
390
+ # non-FS arguments that appear in fsspec.open()
391
+ # inspect could keep this in sync with open()'s signature
392
+ known_kwargs = {
393
+ "compression",
394
+ "encoding",
395
+ "errors",
396
+ "expand",
397
+ "mode",
398
+ "name_function",
399
+ "newline",
400
+ "num",
401
+ }
402
+ kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
403
+ chain = _un_chain(url, kwargs)
404
+ inkwargs = {}
405
+ # Reverse iterate the chain, creating a nested target_* structure
406
+ for i, ch in enumerate(reversed(chain)):
407
+ urls, protocol, kw = ch
408
+ if i == len(chain) - 1:
409
+ inkwargs = dict(**kw, **inkwargs)
410
+ continue
411
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
412
+ inkwargs["target_protocol"] = protocol
413
+ inkwargs["fo"] = urls
414
+ urlpath, protocol, _ = chain[0]
415
+ fs = filesystem(protocol, **inkwargs)
416
+ return fs, urlpath
417
+
418
+
419
+ DEFAULT_EXPAND = conf.get("open_expand", False)
420
+
421
+
422
+ def open(
423
+ urlpath,
424
+ mode="rb",
425
+ compression=None,
426
+ encoding="utf8",
427
+ errors=None,
428
+ protocol=None,
429
+ newline=None,
430
+ expand=None,
431
+ **kwargs,
432
+ ):
433
+ """Given a path or paths, return one ``OpenFile`` object.
434
+
435
+ Parameters
436
+ ----------
437
+ urlpath: string or list
438
+ Absolute or relative filepath. Prefix with a protocol like ``s3://``
439
+ to read from alternative filesystems. Should not include glob
440
+ character(s).
441
+ mode: 'rb', 'wt', etc.
442
+ compression: string or None
443
+ If given, open file using compression codec. Can either be a compression
444
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
445
+ compression from the filename suffix.
446
+ encoding: str
447
+ For text mode only
448
+ errors: None or str
449
+ Passed to TextIOWrapper in text mode
450
+ protocol: str or None
451
+ If given, overrides the protocol found in the URL.
452
+ newline: bytes or None
453
+ Used for line terminator in text mode. If None, uses system default;
454
+ if blank, uses no translation.
455
+ expand: bool or None
456
+ Whether to regard file paths containing special glob characters as needing
457
+ expansion (finding the first match) or absolute. Setting False allows using
458
+ paths which do embed such characters. If None (default), this argument
459
+ takes its value from the DEFAULT_EXPAND module variable, which takes
460
+ its initial value from the "open_expand" config value at startup, which will
461
+ be False if not set.
462
+ **kwargs: dict
463
+ Extra options that make sense to a particular storage connection, e.g.
464
+ host, port, username, password, etc.
465
+
466
+ Examples
467
+ --------
468
+ >>> openfile = open('2015-01-01.csv') # doctest: +SKIP
469
+ >>> openfile = open(
470
+ ... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
471
+ ... ) # doctest: +SKIP
472
+ >>> with openfile as f:
473
+ ... df = pd.read_csv(f) # doctest: +SKIP
474
+ ...
475
+
476
+ Returns
477
+ -------
478
+ ``OpenFile`` object.
479
+
480
+ Notes
481
+ -----
482
+ For a full list of the available protocols and the implementations that
483
+ they map across to see the latest online documentation:
484
+
485
+ - For implementations built into ``fsspec`` see
486
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
487
+ - For implementations in separate packages see
488
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
489
+ """
490
+ expand = DEFAULT_EXPAND if expand is None else expand
491
+ out = open_files(
492
+ urlpath=[urlpath],
493
+ mode=mode,
494
+ compression=compression,
495
+ encoding=encoding,
496
+ errors=errors,
497
+ protocol=protocol,
498
+ newline=newline,
499
+ expand=expand,
500
+ **kwargs,
501
+ )
502
+ if not out:
503
+ raise FileNotFoundError(urlpath)
504
+ return out[0]
505
+
506
+
507
+ def open_local(
508
+ url: str | list[str] | Path | list[Path],
509
+ mode: str = "rb",
510
+ **storage_options: dict,
511
+ ) -> str | list[str]:
512
+ """Open file(s) which can be resolved to local
513
+
514
+ For files which either are local, or get downloaded upon open
515
+ (e.g., by file caching)
516
+
517
+ Parameters
518
+ ----------
519
+ url: str or list(str)
520
+ mode: str
521
+ Must be read mode
522
+ storage_options:
523
+ passed on to FS for or used by open_files (e.g., compression)
524
+ """
525
+ if "r" not in mode:
526
+ raise ValueError("Can only ensure local files when reading")
527
+ of = open_files(url, mode=mode, **storage_options)
528
+ if not getattr(of[0].fs, "local_file", False):
529
+ raise ValueError(
530
+ "open_local can only be used on a filesystem which"
531
+ " has attribute local_file=True"
532
+ )
533
+ with of as files:
534
+ paths = [f.name for f in files]
535
+ if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
536
+ return paths[0]
537
+ return paths
538
+
539
+
540
+ def get_compression(urlpath, compression):
541
+ if compression == "infer":
542
+ compression = infer_compression(urlpath)
543
+ if compression is not None and compression not in compr:
544
+ raise ValueError(f"Compression type {compression} not supported")
545
+ return compression
546
+
547
+
548
+ def split_protocol(urlpath):
549
+ """Return protocol, path pair"""
550
+ urlpath = stringify_path(urlpath)
551
+ if "://" in urlpath:
552
+ protocol, path = urlpath.split("://", 1)
553
+ if len(protocol) > 1:
554
+ # excludes Windows paths
555
+ return protocol, path
556
+ if urlpath.startswith("data:"):
557
+ return urlpath.split(":", 1)
558
+ return None, urlpath
559
+
560
+
561
+ def strip_protocol(urlpath):
562
+ """Return only path part of full URL, according to appropriate backend"""
563
+ protocol, _ = split_protocol(urlpath)
564
+ cls = get_filesystem_class(protocol)
565
+ return cls._strip_protocol(urlpath)
566
+
567
+
568
+ def expand_paths_if_needed(paths, mode, num, fs, name_function):
569
+ """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
570
+ in them (read mode).
571
+
572
+ :param paths: list of paths
573
+ mode: str
574
+ Mode in which to open files.
575
+ num: int
576
+ If opening in writing mode, number of files we expect to create.
577
+ fs: filesystem object
578
+ name_function: callable
579
+ If opening in writing mode, this callable is used to generate path
580
+ names. Names are generated for each partition by
581
+ ``urlpath.replace('*', name_function(partition_index))``.
582
+ :return: list of paths
583
+ """
584
+ expanded_paths = []
585
+ paths = list(paths)
586
+
587
+ if "w" in mode: # read mode
588
+ if sum(1 for p in paths if "*" in p) > 1:
589
+ raise ValueError(
590
+ "When writing data, only one filename mask can be specified."
591
+ )
592
+ num = max(num, len(paths))
593
+
594
+ for curr_path in paths:
595
+ if "*" in curr_path:
596
+ # expand using name_function
597
+ expanded_paths.extend(_expand_paths(curr_path, name_function, num))
598
+ else:
599
+ expanded_paths.append(curr_path)
600
+ # if we generated more paths that asked for, trim the list
601
+ if len(expanded_paths) > num:
602
+ expanded_paths = expanded_paths[:num]
603
+
604
+ else: # read mode
605
+ for curr_path in paths:
606
+ if has_magic(curr_path):
607
+ # expand using glob
608
+ expanded_paths.extend(fs.glob(curr_path))
609
+ else:
610
+ expanded_paths.append(curr_path)
611
+
612
+ return expanded_paths
613
+
614
+
615
+ def get_fs_token_paths(
616
+ urlpath,
617
+ mode="rb",
618
+ num=1,
619
+ name_function=None,
620
+ storage_options=None,
621
+ protocol=None,
622
+ expand=True,
623
+ ):
624
+ """Filesystem, deterministic token, and paths from a urlpath and options.
625
+
626
+ Parameters
627
+ ----------
628
+ urlpath: string or iterable
629
+ Absolute or relative filepath, URL (may include protocols like
630
+ ``s3://``), or globstring pointing to data.
631
+ mode: str, optional
632
+ Mode in which to open files.
633
+ num: int, optional
634
+ If opening in writing mode, number of files we expect to create.
635
+ name_function: callable, optional
636
+ If opening in writing mode, this callable is used to generate path
637
+ names. Names are generated for each partition by
638
+ ``urlpath.replace('*', name_function(partition_index))``.
639
+ storage_options: dict, optional
640
+ Additional keywords to pass to the filesystem class.
641
+ protocol: str or None
642
+ To override the protocol specifier in the URL
643
+ expand: bool
644
+ Expand string paths for writing, assuming the path is a directory
645
+ """
646
+ if isinstance(urlpath, (list, tuple, set)):
647
+ if not urlpath:
648
+ raise ValueError("empty urlpath sequence")
649
+ urlpath0 = stringify_path(next(iter(urlpath)))
650
+ else:
651
+ urlpath0 = stringify_path(urlpath)
652
+ storage_options = storage_options or {}
653
+ if protocol:
654
+ storage_options["protocol"] = protocol
655
+ chain = _un_chain(urlpath0, storage_options or {})
656
+ inkwargs = {}
657
+ # Reverse iterate the chain, creating a nested target_* structure
658
+ for i, ch in enumerate(reversed(chain)):
659
+ urls, nested_protocol, kw = ch
660
+ if i == len(chain) - 1:
661
+ inkwargs = dict(**kw, **inkwargs)
662
+ continue
663
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
664
+ inkwargs["target_protocol"] = nested_protocol
665
+ inkwargs["fo"] = urls
666
+ paths, protocol, _ = chain[0]
667
+ fs = filesystem(protocol, **inkwargs)
668
+ if isinstance(urlpath, (list, tuple, set)):
669
+ pchains = [
670
+ _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
671
+ ]
672
+ if len({pc[1] for pc in pchains}) > 1:
673
+ raise ValueError("Protocol mismatch getting fs from %s", urlpath)
674
+ paths = [pc[0] for pc in pchains]
675
+ else:
676
+ paths = fs._strip_protocol(paths)
677
+ if isinstance(paths, (list, tuple, set)):
678
+ if expand:
679
+ paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
680
+ elif not isinstance(paths, list):
681
+ paths = list(paths)
682
+ else:
683
+ if ("w" in mode or "x" in mode) and expand:
684
+ paths = _expand_paths(paths, name_function, num)
685
+ elif "*" in paths:
686
+ paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
687
+ else:
688
+ paths = [paths]
689
+
690
+ return fs, fs._fs_token, paths
691
+
692
+
693
+ def _expand_paths(path, name_function, num):
694
+ if isinstance(path, str):
695
+ if path.count("*") > 1:
696
+ raise ValueError("Output path spec must contain exactly one '*'.")
697
+ elif "*" not in path:
698
+ path = os.path.join(path, "*.part")
699
+
700
+ if name_function is None:
701
+ name_function = build_name_function(num - 1)
702
+
703
+ paths = [path.replace("*", name_function(i)) for i in range(num)]
704
+ if paths != sorted(paths):
705
+ logger.warning(
706
+ "In order to preserve order between partitions"
707
+ " paths created with ``name_function`` should "
708
+ "sort to partition order"
709
+ )
710
+ elif isinstance(path, (tuple, list)):
711
+ assert len(path) == num
712
+ paths = list(path)
713
+ else:
714
+ raise ValueError(
715
+ "Path should be either\n"
716
+ "1. A list of paths: ['foo.json', 'bar.json', ...]\n"
717
+ "2. A directory: 'foo/\n"
718
+ "3. A path with a '*' in it: 'foo.*.json'"
719
+ )
720
+ return paths
721
+
722
+
723
+ class PickleableTextIOWrapper(io.TextIOWrapper):
724
+ """TextIOWrapper cannot be pickled. This solves it.
725
+
726
+ Requires that ``buffer`` be pickleable, which all instances of
727
+ AbstractBufferedFile are.
728
+ """
729
+
730
+ def __init__(
731
+ self,
732
+ buffer,
733
+ encoding=None,
734
+ errors=None,
735
+ newline=None,
736
+ line_buffering=False,
737
+ write_through=False,
738
+ ):
739
+ self.args = buffer, encoding, errors, newline, line_buffering, write_through
740
+ super().__init__(*self.args)
741
+
742
+ def __reduce__(self):
743
+ return PickleableTextIOWrapper, self.args
venv/lib/python3.12/site-packages/fsspec/dircache.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from collections.abc import MutableMapping
3
+ from functools import lru_cache
4
+
5
+
6
+ class DirCache(MutableMapping):
7
+ """
8
+ Caching of directory listings, in a structure like::
9
+
10
+ {"path0": [
11
+ {"name": "path0/file0",
12
+ "size": 123,
13
+ "type": "file",
14
+ ...
15
+ },
16
+ {"name": "path0/file1",
17
+ },
18
+ ...
19
+ ],
20
+ "path1": [...]
21
+ }
22
+
23
+ Parameters to this class control listing expiry or indeed turn
24
+ caching off
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ use_listings_cache=True,
30
+ listings_expiry_time=None,
31
+ max_paths=None,
32
+ **kwargs,
33
+ ):
34
+ """
35
+
36
+ Parameters
37
+ ----------
38
+ use_listings_cache: bool
39
+ If False, this cache never returns items, but always reports KeyError,
40
+ and setting items has no effect
41
+ listings_expiry_time: int or float (optional)
42
+ Time in seconds that a listing is considered valid. If None,
43
+ listings do not expire.
44
+ max_paths: int (optional)
45
+ The number of most recent listings that are considered valid; 'recent'
46
+ refers to when the entry was set.
47
+ """
48
+ self._cache = {}
49
+ self._times = {}
50
+ if max_paths:
51
+ self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
52
+ self.use_listings_cache = use_listings_cache
53
+ self.listings_expiry_time = listings_expiry_time
54
+ self.max_paths = max_paths
55
+
56
+ def __getitem__(self, item):
57
+ if self.listings_expiry_time is not None:
58
+ if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
59
+ del self._cache[item]
60
+ if self.max_paths:
61
+ self._q(item)
62
+ return self._cache[item] # maybe raises KeyError
63
+
64
+ def clear(self):
65
+ self._cache.clear()
66
+
67
+ def __len__(self):
68
+ return len(self._cache)
69
+
70
+ def __contains__(self, item):
71
+ try:
72
+ self[item]
73
+ return True
74
+ except KeyError:
75
+ return False
76
+
77
+ def __setitem__(self, key, value):
78
+ if not self.use_listings_cache:
79
+ return
80
+ if self.max_paths:
81
+ self._q(key)
82
+ self._cache[key] = value
83
+ if self.listings_expiry_time is not None:
84
+ self._times[key] = time.time()
85
+
86
+ def __delitem__(self, key):
87
+ del self._cache[key]
88
+
89
+ def __iter__(self):
90
+ entries = list(self._cache)
91
+
92
+ return (k for k in entries if k in self)
93
+
94
+ def __reduce__(self):
95
+ return (
96
+ DirCache,
97
+ (self.use_listings_cache, self.listings_expiry_time, self.max_paths),
98
+ )
venv/lib/python3.12/site-packages/fsspec/exceptions.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ fsspec user-defined exception classes
3
+ """
4
+
5
+ import asyncio
6
+
7
+
8
+ class BlocksizeMismatchError(ValueError):
9
+ """
10
+ Raised when a cached file is opened with a different blocksize than it was
11
+ written with
12
+ """
13
+
14
+
15
+ class FSTimeoutError(asyncio.TimeoutError):
16
+ """
17
+ Raised when a fsspec function timed out occurs
18
+ """
venv/lib/python3.12/site-packages/fsspec/fuse.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import logging
3
+ import os
4
+ import stat
5
+ import threading
6
+ import time
7
+ from errno import EIO, ENOENT
8
+
9
+ from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
10
+
11
+ from fsspec import __version__
12
+ from fsspec.core import url_to_fs
13
+
14
+ logger = logging.getLogger("fsspec.fuse")
15
+
16
+
17
+ class FUSEr(Operations):
18
+ def __init__(self, fs, path, ready_file=False):
19
+ self.fs = fs
20
+ self.cache = {}
21
+ self.root = path.rstrip("/") + "/"
22
+ self.counter = 0
23
+ logger.info("Starting FUSE at %s", path)
24
+ self._ready_file = ready_file
25
+
26
+ def getattr(self, path, fh=None):
27
+ logger.debug("getattr %s", path)
28
+ if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
29
+ return {"type": "file", "st_size": 5}
30
+
31
+ path = "".join([self.root, path.lstrip("/")]).rstrip("/")
32
+ try:
33
+ info = self.fs.info(path)
34
+ except FileNotFoundError as exc:
35
+ raise FuseOSError(ENOENT) from exc
36
+
37
+ data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
38
+ perm = info.get("mode", 0o777)
39
+
40
+ if info["type"] != "file":
41
+ data["st_mode"] = stat.S_IFDIR | perm
42
+ data["st_size"] = 0
43
+ data["st_blksize"] = 0
44
+ else:
45
+ data["st_mode"] = stat.S_IFREG | perm
46
+ data["st_size"] = info["size"]
47
+ data["st_blksize"] = 5 * 2**20
48
+ data["st_nlink"] = 1
49
+ data["st_atime"] = info["atime"] if "atime" in info else time.time()
50
+ data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
51
+ data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
52
+ return data
53
+
54
+ def readdir(self, path, fh):
55
+ logger.debug("readdir %s", path)
56
+ path = "".join([self.root, path.lstrip("/")])
57
+ files = self.fs.ls(path, False)
58
+ files = [os.path.basename(f.rstrip("/")) for f in files]
59
+ return [".", ".."] + files
60
+
61
+ def mkdir(self, path, mode):
62
+ path = "".join([self.root, path.lstrip("/")])
63
+ self.fs.mkdir(path)
64
+ return 0
65
+
66
+ def rmdir(self, path):
67
+ path = "".join([self.root, path.lstrip("/")])
68
+ self.fs.rmdir(path)
69
+ return 0
70
+
71
+ def read(self, path, size, offset, fh):
72
+ logger.debug("read %s", (path, size, offset))
73
+ if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
74
+ # status indicator
75
+ return b"ready"
76
+
77
+ f = self.cache[fh]
78
+ f.seek(offset)
79
+ out = f.read(size)
80
+ return out
81
+
82
+ def write(self, path, data, offset, fh):
83
+ logger.debug("write %s", (path, offset))
84
+ f = self.cache[fh]
85
+ f.seek(offset)
86
+ f.write(data)
87
+ return len(data)
88
+
89
+ def create(self, path, flags, fi=None):
90
+ logger.debug("create %s", (path, flags))
91
+ fn = "".join([self.root, path.lstrip("/")])
92
+ self.fs.touch(fn) # OS will want to get attributes immediately
93
+ f = self.fs.open(fn, "wb")
94
+ self.cache[self.counter] = f
95
+ self.counter += 1
96
+ return self.counter - 1
97
+
98
+ def open(self, path, flags):
99
+ logger.debug("open %s", (path, flags))
100
+ fn = "".join([self.root, path.lstrip("/")])
101
+ if flags % 2 == 0:
102
+ # read
103
+ mode = "rb"
104
+ else:
105
+ # write/create
106
+ mode = "wb"
107
+ self.cache[self.counter] = self.fs.open(fn, mode)
108
+ self.counter += 1
109
+ return self.counter - 1
110
+
111
+ def truncate(self, path, length, fh=None):
112
+ fn = "".join([self.root, path.lstrip("/")])
113
+ if length != 0:
114
+ raise NotImplementedError
115
+ # maybe should be no-op since open with write sets size to zero anyway
116
+ self.fs.touch(fn)
117
+
118
+ def unlink(self, path):
119
+ fn = "".join([self.root, path.lstrip("/")])
120
+ try:
121
+ self.fs.rm(fn, False)
122
+ except (OSError, FileNotFoundError) as exc:
123
+ raise FuseOSError(EIO) from exc
124
+
125
+ def release(self, path, fh):
126
+ try:
127
+ if fh in self.cache:
128
+ f = self.cache[fh]
129
+ f.close()
130
+ self.cache.pop(fh)
131
+ except Exception as e:
132
+ print(e)
133
+ return 0
134
+
135
+ def chmod(self, path, mode):
136
+ if hasattr(self.fs, "chmod"):
137
+ path = "".join([self.root, path.lstrip("/")])
138
+ return self.fs.chmod(path, mode)
139
+ raise NotImplementedError
140
+
141
+
142
+ def run(
143
+ fs,
144
+ path,
145
+ mount_point,
146
+ foreground=True,
147
+ threads=False,
148
+ ready_file=False,
149
+ ops_class=FUSEr,
150
+ ):
151
+ """Mount stuff in a local directory
152
+
153
+ This uses fusepy to make it appear as if a given path on an fsspec
154
+ instance is in fact resident within the local file-system.
155
+
156
+ This requires that fusepy by installed, and that FUSE be available on
157
+ the system (typically requiring a package to be installed with
158
+ apt, yum, brew, etc.).
159
+
160
+ Parameters
161
+ ----------
162
+ fs: file-system instance
163
+ From one of the compatible implementations
164
+ path: str
165
+ Location on that file-system to regard as the root directory to
166
+ mount. Note that you typically should include the terminating "/"
167
+ character.
168
+ mount_point: str
169
+ An empty directory on the local file-system where the contents of
170
+ the remote path will appear.
171
+ foreground: bool
172
+ Whether or not calling this function will block. Operation will
173
+ typically be more stable if True.
174
+ threads: bool
175
+ Whether or not to create threads when responding to file operations
176
+ within the mounter directory. Operation will typically be more
177
+ stable if False.
178
+ ready_file: bool
179
+ Whether the FUSE process is ready. The ``.fuse_ready`` file will
180
+ exist in the ``mount_point`` directory if True. Debugging purpose.
181
+ ops_class: FUSEr or Subclass of FUSEr
182
+ To override the default behavior of FUSEr. For Example, logging
183
+ to file.
184
+
185
+ """
186
+ func = lambda: FUSE(
187
+ ops_class(fs, path, ready_file=ready_file),
188
+ mount_point,
189
+ nothreads=not threads,
190
+ foreground=foreground,
191
+ )
192
+ if not foreground:
193
+ th = threading.Thread(target=func)
194
+ th.daemon = True
195
+ th.start()
196
+ return th
197
+ else: # pragma: no cover
198
+ try:
199
+ func()
200
+ except KeyboardInterrupt:
201
+ pass
202
+
203
+
204
+ def main(args):
205
+ """Mount filesystem from chained URL to MOUNT_POINT.
206
+
207
+ Examples:
208
+
209
+ python3 -m fsspec.fuse memory /usr/share /tmp/mem
210
+
211
+ python3 -m fsspec.fuse local /tmp/source /tmp/local \\
212
+ -l /tmp/fsspecfuse.log
213
+
214
+ You can also mount chained-URLs and use special settings:
215
+
216
+ python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
217
+ / /tmp/zip \\
218
+ -o 'filecache-cache_storage=/tmp/simplecache'
219
+
220
+ You can specify the type of the setting by using `[int]` or `[bool]`,
221
+ (`true`, `yes`, `1` represents the Boolean value `True`):
222
+
223
+ python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
224
+ /historic/packages/RPMS /tmp/ftp \\
225
+ -o 'simplecache-cache_storage=/tmp/simplecache' \\
226
+ -o 'simplecache-check_files=false[bool]' \\
227
+ -o 'ftp-listings_expiry_time=60[int]' \\
228
+ -o 'ftp-username=anonymous' \\
229
+ -o 'ftp-password=xieyanbo'
230
+ """
231
+
232
+ class RawDescriptionArgumentParser(argparse.ArgumentParser):
233
+ def format_help(self):
234
+ usage = super().format_help()
235
+ parts = usage.split("\n\n")
236
+ parts[1] = self.description.rstrip()
237
+ return "\n\n".join(parts)
238
+
239
+ parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
240
+ parser.add_argument("--version", action="version", version=__version__)
241
+ parser.add_argument("url", type=str, help="fs url")
242
+ parser.add_argument("source_path", type=str, help="source directory in fs")
243
+ parser.add_argument("mount_point", type=str, help="local directory")
244
+ parser.add_argument(
245
+ "-o",
246
+ "--option",
247
+ action="append",
248
+ help="Any options of protocol included in the chained URL",
249
+ )
250
+ parser.add_argument(
251
+ "-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
252
+ )
253
+ parser.add_argument(
254
+ "-f",
255
+ "--foreground",
256
+ action="store_false",
257
+ help="Running in foreground or not (Default: False)",
258
+ )
259
+ parser.add_argument(
260
+ "-t",
261
+ "--threads",
262
+ action="store_false",
263
+ help="Running with threads support (Default: False)",
264
+ )
265
+ parser.add_argument(
266
+ "-r",
267
+ "--ready-file",
268
+ action="store_false",
269
+ help="The `.fuse_ready` file will exist after FUSE is ready. "
270
+ "(Debugging purpose, Default: False)",
271
+ )
272
+ args = parser.parse_args(args)
273
+
274
+ kwargs = {}
275
+ for item in args.option or []:
276
+ key, sep, value = item.partition("=")
277
+ if not sep:
278
+ parser.error(message=f"Wrong option: {item!r}")
279
+ val = value.lower()
280
+ if val.endswith("[int]"):
281
+ value = int(value[: -len("[int]")])
282
+ elif val.endswith("[bool]"):
283
+ value = val[: -len("[bool]")] in ["1", "yes", "true"]
284
+
285
+ if "-" in key:
286
+ fs_name, setting_name = key.split("-", 1)
287
+ if fs_name in kwargs:
288
+ kwargs[fs_name][setting_name] = value
289
+ else:
290
+ kwargs[fs_name] = {setting_name: value}
291
+ else:
292
+ kwargs[key] = value
293
+
294
+ if args.log_file:
295
+ logging.basicConfig(
296
+ level=logging.DEBUG,
297
+ filename=args.log_file,
298
+ format="%(asctime)s %(message)s",
299
+ )
300
+
301
+ class LoggingFUSEr(FUSEr, LoggingMixIn):
302
+ pass
303
+
304
+ fuser = LoggingFUSEr
305
+ else:
306
+ fuser = FUSEr
307
+
308
+ fs, url_path = url_to_fs(args.url, **kwargs)
309
+ logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
310
+ run(
311
+ fs,
312
+ args.source_path,
313
+ args.mount_point,
314
+ foreground=args.foreground,
315
+ threads=args.threads,
316
+ ready_file=args.ready_file,
317
+ ops_class=fuser,
318
+ )
319
+
320
+
321
+ if __name__ == "__main__":
322
+ import sys
323
+
324
+ main(sys.argv[1:])
venv/lib/python3.12/site-packages/fsspec/generic.py ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import uuid
8
+ from typing import Optional
9
+
10
+ from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
11
+ from .callbacks import DEFAULT_CALLBACK
12
+ from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
13
+
14
+ _generic_fs = {}
15
+ logger = logging.getLogger("fsspec.generic")
16
+
17
+
18
+ def set_generic_fs(protocol, **storage_options):
19
+ """Populate the dict used for method=="generic" lookups"""
20
+ _generic_fs[protocol] = filesystem(protocol, **storage_options)
21
+
22
+
23
+ def _resolve_fs(url, method, protocol=None, storage_options=None):
24
+ """Pick instance of backend FS"""
25
+ url = url[0] if isinstance(url, (list, tuple)) else url
26
+ protocol = protocol or split_protocol(url)[0]
27
+ storage_options = storage_options or {}
28
+ if method == "default":
29
+ return filesystem(protocol)
30
+ if method == "generic":
31
+ return _generic_fs[protocol]
32
+ if method == "current":
33
+ cls = get_filesystem_class(protocol)
34
+ return cls.current()
35
+ if method == "options":
36
+ fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
37
+ return fs
38
+ raise ValueError(f"Unknown FS resolution method: {method}")
39
+
40
+
41
+ def rsync(
42
+ source,
43
+ destination,
44
+ delete_missing=False,
45
+ source_field="size",
46
+ dest_field="size",
47
+ update_cond="different",
48
+ inst_kwargs=None,
49
+ fs=None,
50
+ **kwargs,
51
+ ):
52
+ """Sync files between two directory trees
53
+
54
+ (experimental)
55
+
56
+ Parameters
57
+ ----------
58
+ source: str
59
+ Root of the directory tree to take files from. This must be a directory, but
60
+ do not include any terminating "/" character
61
+ destination: str
62
+ Root path to copy into. The contents of this location should be
63
+ identical to the contents of ``source`` when done. This will be made a
64
+ directory, and the terminal "/" should not be included.
65
+ delete_missing: bool
66
+ If there are paths in the destination that don't exist in the
67
+ source and this is True, delete them. Otherwise, leave them alone.
68
+ source_field: str | callable
69
+ If ``update_field`` is "different", this is the key in the info
70
+ of source files to consider for difference. Maybe a function of the
71
+ info dict.
72
+ dest_field: str | callable
73
+ If ``update_field`` is "different", this is the key in the info
74
+ of destination files to consider for difference. May be a function of
75
+ the info dict.
76
+ update_cond: "different"|"always"|"never"
77
+ If "always", every file is copied, regardless of whether it exists in
78
+ the destination. If "never", files that exist in the destination are
79
+ not copied again. If "different" (default), only copy if the info
80
+ fields given by ``source_field`` and ``dest_field`` (usually "size")
81
+ are different. Other comparisons may be added in the future.
82
+ inst_kwargs: dict|None
83
+ If ``fs`` is None, use this set of keyword arguments to make a
84
+ GenericFileSystem instance
85
+ fs: GenericFileSystem|None
86
+ Instance to use if explicitly given. The instance defines how to
87
+ to make downstream file system instances from paths.
88
+
89
+ Returns
90
+ -------
91
+ dict of the copy operations that were performed, {source: destination}
92
+ """
93
+ fs = fs or GenericFileSystem(**(inst_kwargs or {}))
94
+ source = fs._strip_protocol(source)
95
+ destination = fs._strip_protocol(destination)
96
+ allfiles = fs.find(source, withdirs=True, detail=True)
97
+ if not fs.isdir(source):
98
+ raise ValueError("Can only rsync on a directory")
99
+ otherfiles = fs.find(destination, withdirs=True, detail=True)
100
+ dirs = [
101
+ a
102
+ for a, v in allfiles.items()
103
+ if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
104
+ ]
105
+ logger.debug(f"{len(dirs)} directories to create")
106
+ if dirs:
107
+ fs.make_many_dirs(
108
+ [dirn.replace(source, destination) for dirn in dirs], exist_ok=True
109
+ )
110
+ allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
111
+ logger.debug(f"{len(allfiles)} files to consider for copy")
112
+ to_delete = [
113
+ o
114
+ for o, v in otherfiles.items()
115
+ if o.replace(destination, source) not in allfiles and v["type"] == "file"
116
+ ]
117
+ for k, v in allfiles.copy().items():
118
+ otherfile = k.replace(source, destination)
119
+ if otherfile in otherfiles:
120
+ if update_cond == "always":
121
+ allfiles[k] = otherfile
122
+ elif update_cond == "different":
123
+ inf1 = source_field(v) if callable(source_field) else v[source_field]
124
+ v2 = otherfiles[otherfile]
125
+ inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
126
+ if inf1 != inf2:
127
+ # details mismatch, make copy
128
+ allfiles[k] = otherfile
129
+ else:
130
+ # details match, don't copy
131
+ allfiles.pop(k)
132
+ else:
133
+ # file not in target yet
134
+ allfiles[k] = otherfile
135
+ logger.debug(f"{len(allfiles)} files to copy")
136
+ if allfiles:
137
+ source_files, target_files = zip(*allfiles.items())
138
+ fs.cp(source_files, target_files, **kwargs)
139
+ logger.debug(f"{len(to_delete)} files to delete")
140
+ if delete_missing and to_delete:
141
+ fs.rm(to_delete)
142
+ return allfiles
143
+
144
+
145
+ class GenericFileSystem(AsyncFileSystem):
146
+ """Wrapper over all other FS types
147
+
148
+ <experimental!>
149
+
150
+ This implementation is a single unified interface to be able to run FS operations
151
+ over generic URLs, and dispatch to the specific implementations using the URL
152
+ protocol prefix.
153
+
154
+ Note: instances of this FS are always async, even if you never use it with any async
155
+ backend.
156
+ """
157
+
158
+ protocol = "generic" # there is no real reason to ever use a protocol with this FS
159
+
160
+ def __init__(self, default_method="default", storage_options=None, **kwargs):
161
+ """
162
+
163
+ Parameters
164
+ ----------
165
+ default_method: str (optional)
166
+ Defines how to configure backend FS instances. Options are:
167
+ - "default": instantiate like FSClass(), with no
168
+ extra arguments; this is the default instance of that FS, and can be
169
+ configured via the config system
170
+ - "generic": takes instances from the `_generic_fs` dict in this module,
171
+ which you must populate before use. Keys are by protocol
172
+ - "options": expects storage_options, a dict mapping protocol to
173
+ kwargs to use when constructing the filesystem
174
+ - "current": takes the most recently instantiated version of each FS
175
+ """
176
+ self.method = default_method
177
+ self.st_opts = storage_options
178
+ super().__init__(**kwargs)
179
+
180
+ def _parent(self, path):
181
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
182
+ return fs.unstrip_protocol(fs._parent(path))
183
+
184
+ def _strip_protocol(self, path):
185
+ # normalization only
186
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
187
+ return fs.unstrip_protocol(fs._strip_protocol(path))
188
+
189
+ async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
190
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
191
+ if fs.async_impl:
192
+ out = await fs._find(
193
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
194
+ )
195
+ else:
196
+ out = fs.find(
197
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
198
+ )
199
+ result = {}
200
+ for k, v in out.items():
201
+ v = v.copy() # don't corrupt target FS dircache
202
+ name = fs.unstrip_protocol(k)
203
+ v["name"] = name
204
+ result[name] = v
205
+ if detail:
206
+ return result
207
+ return list(result)
208
+
209
+ async def _info(self, url, **kwargs):
210
+ fs = _resolve_fs(url, self.method)
211
+ if fs.async_impl:
212
+ out = await fs._info(url, **kwargs)
213
+ else:
214
+ out = fs.info(url, **kwargs)
215
+ out = out.copy() # don't edit originals
216
+ out["name"] = fs.unstrip_protocol(out["name"])
217
+ return out
218
+
219
+ async def _ls(
220
+ self,
221
+ url,
222
+ detail=True,
223
+ **kwargs,
224
+ ):
225
+ fs = _resolve_fs(url, self.method)
226
+ if fs.async_impl:
227
+ out = await fs._ls(url, detail=True, **kwargs)
228
+ else:
229
+ out = fs.ls(url, detail=True, **kwargs)
230
+ out = [o.copy() for o in out] # don't edit originals
231
+ for o in out:
232
+ o["name"] = fs.unstrip_protocol(o["name"])
233
+ if detail:
234
+ return out
235
+ else:
236
+ return [o["name"] for o in out]
237
+
238
+ async def _cat_file(
239
+ self,
240
+ url,
241
+ **kwargs,
242
+ ):
243
+ fs = _resolve_fs(url, self.method)
244
+ if fs.async_impl:
245
+ return await fs._cat_file(url, **kwargs)
246
+ else:
247
+ return fs.cat_file(url, **kwargs)
248
+
249
+ async def _pipe_file(
250
+ self,
251
+ path,
252
+ value,
253
+ **kwargs,
254
+ ):
255
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
256
+ if fs.async_impl:
257
+ return await fs._pipe_file(path, value, **kwargs)
258
+ else:
259
+ return fs.pipe_file(path, value, **kwargs)
260
+
261
+ async def _rm(self, url, **kwargs):
262
+ urls = url
263
+ if isinstance(urls, str):
264
+ urls = [urls]
265
+ fs = _resolve_fs(urls[0], self.method)
266
+ if fs.async_impl:
267
+ await fs._rm(urls, **kwargs)
268
+ else:
269
+ fs.rm(url, **kwargs)
270
+
271
+ async def _makedirs(self, path, exist_ok=False):
272
+ logger.debug("Make dir %s", path)
273
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
274
+ if fs.async_impl:
275
+ await fs._makedirs(path, exist_ok=exist_ok)
276
+ else:
277
+ fs.makedirs(path, exist_ok=exist_ok)
278
+
279
+ def rsync(self, source, destination, **kwargs):
280
+ """Sync files between two directory trees
281
+
282
+ See `func:rsync` for more details.
283
+ """
284
+ rsync(source, destination, fs=self, **kwargs)
285
+
286
+ async def _cp_file(
287
+ self,
288
+ url,
289
+ url2,
290
+ blocksize=2**20,
291
+ callback=DEFAULT_CALLBACK,
292
+ tempdir: Optional[str] = None,
293
+ **kwargs,
294
+ ):
295
+ fs = _resolve_fs(url, self.method)
296
+ fs2 = _resolve_fs(url2, self.method)
297
+ if fs is fs2:
298
+ # pure remote
299
+ if fs.async_impl:
300
+ return await fs._copy(url, url2, **kwargs)
301
+ else:
302
+ return fs.copy(url, url2, **kwargs)
303
+ await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
304
+
305
+ async def _make_many_dirs(self, urls, exist_ok=True):
306
+ fs = _resolve_fs(urls[0], self.method)
307
+ if fs.async_impl:
308
+ coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
309
+ await _run_coros_in_chunks(coros)
310
+ else:
311
+ for u in urls:
312
+ fs.makedirs(u, exist_ok=exist_ok)
313
+
314
+ make_many_dirs = sync_wrapper(_make_many_dirs)
315
+
316
+ async def _copy(
317
+ self,
318
+ path1: list[str],
319
+ path2: list[str],
320
+ recursive: bool = False,
321
+ on_error: str = "ignore",
322
+ maxdepth: Optional[int] = None,
323
+ batch_size: Optional[int] = None,
324
+ tempdir: Optional[str] = None,
325
+ **kwargs,
326
+ ):
327
+ # TODO: special case for one FS being local, which can use get/put
328
+ # TODO: special case for one being memFS, which can use cat/pipe
329
+ if recursive:
330
+ raise NotImplementedError("Please use fsspec.generic.rsync")
331
+ path1 = [path1] if isinstance(path1, str) else path1
332
+ path2 = [path2] if isinstance(path2, str) else path2
333
+
334
+ fs = _resolve_fs(path1, self.method)
335
+ fs2 = _resolve_fs(path2, self.method)
336
+
337
+ if fs is fs2:
338
+ if fs.async_impl:
339
+ return await fs._copy(path1, path2, **kwargs)
340
+ else:
341
+ return fs.copy(path1, path2, **kwargs)
342
+
343
+ await copy_file_op(
344
+ fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
345
+ )
346
+
347
+
348
+ async def copy_file_op(
349
+ fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
350
+ ):
351
+ import tempfile
352
+
353
+ tempdir = tempdir or tempfile.mkdtemp()
354
+ try:
355
+ coros = [
356
+ _copy_file_op(
357
+ fs1,
358
+ u1,
359
+ fs2,
360
+ u2,
361
+ os.path.join(tempdir, uuid.uuid4().hex),
362
+ )
363
+ for u1, u2 in zip(url1, url2)
364
+ ]
365
+ out = await _run_coros_in_chunks(
366
+ coros, batch_size=batch_size, return_exceptions=True
367
+ )
368
+ finally:
369
+ shutil.rmtree(tempdir)
370
+ if on_error == "return":
371
+ return out
372
+ elif on_error == "raise":
373
+ for o in out:
374
+ if isinstance(o, Exception):
375
+ raise o
376
+
377
+
378
+ async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
379
+ if fs1.async_impl:
380
+ await fs1._get_file(url1, local)
381
+ else:
382
+ fs1.get_file(url1, local)
383
+ if fs2.async_impl:
384
+ await fs2._put_file(local, url2)
385
+ else:
386
+ fs2.put_file(local, url2)
387
+ os.unlink(local)
388
+ logger.debug("Copy %s -> %s; done", url1, url2)
389
+
390
+
391
+ async def maybe_await(cor):
392
+ if inspect.iscoroutine(cor):
393
+ return await cor
394
+ else:
395
+ return cor
venv/lib/python3.12/site-packages/fsspec/gui.py ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import contextlib
3
+ import logging
4
+ import os
5
+ import re
6
+ from typing import ClassVar, Sequence
7
+
8
+ import panel as pn
9
+
10
+ from .core import OpenFile, get_filesystem_class, split_protocol
11
+ from .registry import known_implementations
12
+
13
+ pn.extension()
14
+ logger = logging.getLogger("fsspec.gui")
15
+
16
+
17
+ class SigSlot:
18
+ """Signal-slot mixin, for Panel event passing
19
+
20
+ Include this class in a widget manager's superclasses to be able to
21
+ register events and callbacks on Panel widgets managed by that class.
22
+
23
+ The method ``_register`` should be called as widgets are added, and external
24
+ code should call ``connect`` to associate callbacks.
25
+
26
+ By default, all signals emit a DEBUG logging statement.
27
+ """
28
+
29
+ # names of signals that this class may emit each of which must be
30
+ # set by _register for any new instance
31
+ signals: ClassVar[Sequence[str]] = []
32
+ # names of actions that this class may respond to
33
+ slots: ClassVar[Sequence[str]] = []
34
+
35
+ # each of which must be a method name
36
+
37
+ def __init__(self):
38
+ self._ignoring_events = False
39
+ self._sigs = {}
40
+ self._map = {}
41
+ self._setup()
42
+
43
+ def _setup(self):
44
+ """Create GUI elements and register signals"""
45
+ self.panel = pn.pane.PaneBase()
46
+ # no signals to set up in the base class
47
+
48
+ def _register(
49
+ self, widget, name, thing="value", log_level=logging.DEBUG, auto=False
50
+ ):
51
+ """Watch the given attribute of a widget and assign it a named event
52
+
53
+ This is normally called at the time a widget is instantiated, in the
54
+ class which owns it.
55
+
56
+ Parameters
57
+ ----------
58
+ widget : pn.layout.Panel or None
59
+ Widget to watch. If None, an anonymous signal not associated with
60
+ any widget.
61
+ name : str
62
+ Name of this event
63
+ thing : str
64
+ Attribute of the given widget to watch
65
+ log_level : int
66
+ When the signal is triggered, a logging event of the given level
67
+ will be fired in the dfviz logger.
68
+ auto : bool
69
+ If True, automatically connects with a method in this class of the
70
+ same name.
71
+ """
72
+ if name not in self.signals:
73
+ raise ValueError(f"Attempt to assign an undeclared signal: {name}")
74
+ self._sigs[name] = {
75
+ "widget": widget,
76
+ "callbacks": [],
77
+ "thing": thing,
78
+ "log": log_level,
79
+ }
80
+ wn = "-".join(
81
+ [
82
+ getattr(widget, "name", str(widget)) if widget is not None else "none",
83
+ thing,
84
+ ]
85
+ )
86
+ self._map[wn] = name
87
+ if widget is not None:
88
+ widget.param.watch(self._signal, thing, onlychanged=True)
89
+ if auto and hasattr(self, name):
90
+ self.connect(name, getattr(self, name))
91
+
92
+ def _repr_mimebundle_(self, *args, **kwargs):
93
+ """Display in a notebook or a server"""
94
+ try:
95
+ return self.panel._repr_mimebundle_(*args, **kwargs)
96
+ except (ValueError, AttributeError) as exc:
97
+ raise NotImplementedError(
98
+ "Panel does not seem to be set up properly"
99
+ ) from exc
100
+
101
+ def connect(self, signal, slot):
102
+ """Associate call back with given event
103
+
104
+ The callback must be a function which takes the "new" value of the
105
+ watched attribute as the only parameter. If the callback return False,
106
+ this cancels any further processing of the given event.
107
+
108
+ Alternatively, the callback can be a string, in which case it means
109
+ emitting the correspondingly-named event (i.e., connect to self)
110
+ """
111
+ self._sigs[signal]["callbacks"].append(slot)
112
+
113
+ def _signal(self, event):
114
+ """This is called by a an action on a widget
115
+
116
+ Within an self.ignore_events context, nothing happens.
117
+
118
+ Tests can execute this method by directly changing the values of
119
+ widget components.
120
+ """
121
+ if not self._ignoring_events:
122
+ wn = "-".join([event.obj.name, event.name])
123
+ if wn in self._map and self._map[wn] in self._sigs:
124
+ self._emit(self._map[wn], event.new)
125
+
126
+ @contextlib.contextmanager
127
+ def ignore_events(self):
128
+ """Temporarily turn off events processing in this instance
129
+
130
+ (does not propagate to children)
131
+ """
132
+ self._ignoring_events = True
133
+ try:
134
+ yield
135
+ finally:
136
+ self._ignoring_events = False
137
+
138
+ def _emit(self, sig, value=None):
139
+ """An event happened, call its callbacks
140
+
141
+ This method can be used in tests to simulate message passing without
142
+ directly changing visual elements.
143
+
144
+ Calling of callbacks will halt whenever one returns False.
145
+ """
146
+ logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
147
+ for callback in self._sigs[sig]["callbacks"]:
148
+ if isinstance(callback, str):
149
+ self._emit(callback)
150
+ else:
151
+ try:
152
+ # running callbacks should not break the interface
153
+ ret = callback(value)
154
+ if ret is False:
155
+ break
156
+ except Exception as e:
157
+ logger.exception(
158
+ "Exception (%s) while executing callback for signal: %s",
159
+ e,
160
+ sig,
161
+ )
162
+
163
+ def show(self, threads=False):
164
+ """Open a new browser tab and display this instance's interface"""
165
+ self.panel.show(threads=threads, verbose=False)
166
+ return self
167
+
168
+
169
+ class SingleSelect(SigSlot):
170
+ """A multiselect which only allows you to select one item for an event"""
171
+
172
+ signals = ["_selected", "selected"] # the first is internal
173
+ slots = ["set_options", "set_selection", "add", "clear", "select"]
174
+
175
+ def __init__(self, **kwargs):
176
+ self.kwargs = kwargs
177
+ super().__init__()
178
+
179
+ def _setup(self):
180
+ self.panel = pn.widgets.MultiSelect(**self.kwargs)
181
+ self._register(self.panel, "_selected", "value")
182
+ self._register(None, "selected")
183
+ self.connect("_selected", self.select_one)
184
+
185
+ def _signal(self, *args, **kwargs):
186
+ super()._signal(*args, **kwargs)
187
+
188
+ def select_one(self, *_):
189
+ with self.ignore_events():
190
+ val = [self.panel.value[-1]] if self.panel.value else []
191
+ self.panel.value = val
192
+ self._emit("selected", self.panel.value)
193
+
194
+ def set_options(self, options):
195
+ self.panel.options = options
196
+
197
+ def clear(self):
198
+ self.panel.options = []
199
+
200
+ @property
201
+ def value(self):
202
+ return self.panel.value
203
+
204
+ def set_selection(self, selection):
205
+ self.panel.value = [selection]
206
+
207
+
208
+ class FileSelector(SigSlot):
209
+ """Panel-based graphical file selector widget
210
+
211
+ Instances of this widget are interactive and can be displayed in jupyter by having
212
+ them as the output of a cell, or in a separate browser tab using ``.show()``.
213
+ """
214
+
215
+ signals = [
216
+ "protocol_changed",
217
+ "selection_changed",
218
+ "directory_entered",
219
+ "home_clicked",
220
+ "up_clicked",
221
+ "go_clicked",
222
+ "filters_changed",
223
+ ]
224
+ slots = ["set_filters", "go_home"]
225
+
226
+ def __init__(self, url=None, filters=None, ignore=None, kwargs=None):
227
+ """
228
+
229
+ Parameters
230
+ ----------
231
+ url : str (optional)
232
+ Initial value of the URL to populate the dialog; should include protocol
233
+ filters : list(str) (optional)
234
+ File endings to include in the listings. If not included, all files are
235
+ allowed. Does not affect directories.
236
+ If given, the endings will appear as checkboxes in the interface
237
+ ignore : list(str) (optional)
238
+ Regex(s) of file basename patterns to ignore, e.g., "\\." for typical
239
+ hidden files on posix
240
+ kwargs : dict (optional)
241
+ To pass to file system instance
242
+ """
243
+ if url:
244
+ self.init_protocol, url = split_protocol(url)
245
+ else:
246
+ self.init_protocol, url = "file", os.getcwd()
247
+ self.init_url = url
248
+ self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
249
+ self.filters = filters
250
+ self.ignore = [re.compile(i) for i in ignore or []]
251
+ self._fs = None
252
+ super().__init__()
253
+
254
+ def _setup(self):
255
+ self.url = pn.widgets.TextInput(
256
+ name="url",
257
+ value=self.init_url,
258
+ align="end",
259
+ sizing_mode="stretch_width",
260
+ width_policy="max",
261
+ )
262
+ self.protocol = pn.widgets.Select(
263
+ options=sorted(known_implementations),
264
+ value=self.init_protocol,
265
+ name="protocol",
266
+ align="center",
267
+ )
268
+ self.kwargs = pn.widgets.TextInput(
269
+ name="kwargs", value=self.init_kwargs, align="center"
270
+ )
271
+ self.go = pn.widgets.Button(name="⇨", align="end", width=45)
272
+ self.main = SingleSelect(size=10)
273
+ self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
274
+ self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end")
275
+
276
+ self._register(self.protocol, "protocol_changed", auto=True)
277
+ self._register(self.go, "go_clicked", "clicks", auto=True)
278
+ self._register(self.up, "up_clicked", "clicks", auto=True)
279
+ self._register(self.home, "home_clicked", "clicks", auto=True)
280
+ self._register(None, "selection_changed")
281
+ self.main.connect("selected", self.selection_changed)
282
+ self._register(None, "directory_entered")
283
+ self.prev_protocol = self.protocol.value
284
+ self.prev_kwargs = self.storage_options
285
+
286
+ self.filter_sel = pn.widgets.CheckBoxGroup(
287
+ value=[], options=[], inline=False, align="end", width_policy="min"
288
+ )
289
+ self._register(self.filter_sel, "filters_changed", auto=True)
290
+
291
+ self.panel = pn.Column(
292
+ pn.Row(self.protocol, self.kwargs),
293
+ pn.Row(self.home, self.up, self.url, self.go, self.filter_sel),
294
+ self.main.panel,
295
+ )
296
+ self.set_filters(self.filters)
297
+ self.go_clicked()
298
+
299
+ def set_filters(self, filters=None):
300
+ self.filters = filters
301
+ if filters:
302
+ self.filter_sel.options = filters
303
+ self.filter_sel.value = filters
304
+ else:
305
+ self.filter_sel.options = []
306
+ self.filter_sel.value = []
307
+
308
+ @property
309
+ def storage_options(self):
310
+ """Value of the kwargs box as a dictionary"""
311
+ return ast.literal_eval(self.kwargs.value) or {}
312
+
313
+ @property
314
+ def fs(self):
315
+ """Current filesystem instance"""
316
+ if self._fs is None:
317
+ cls = get_filesystem_class(self.protocol.value)
318
+ self._fs = cls(**self.storage_options)
319
+ return self._fs
320
+
321
+ @property
322
+ def urlpath(self):
323
+ """URL of currently selected item"""
324
+ return (
325
+ (f"{self.protocol.value}://{self.main.value[0]}")
326
+ if self.main.value
327
+ else None
328
+ )
329
+
330
+ def open_file(self, mode="rb", compression=None, encoding=None):
331
+ """Create OpenFile instance for the currently selected item
332
+
333
+ For example, in a notebook you might do something like
334
+
335
+ .. code-block::
336
+
337
+ [ ]: sel = FileSelector(); sel
338
+
339
+ # user selects their file
340
+
341
+ [ ]: with sel.open_file('rb') as f:
342
+ ... out = f.read()
343
+
344
+ Parameters
345
+ ----------
346
+ mode: str (optional)
347
+ Open mode for the file.
348
+ compression: str (optional)
349
+ The interact with the file as compressed. Set to 'infer' to guess
350
+ compression from the file ending
351
+ encoding: str (optional)
352
+ If using text mode, use this encoding; defaults to UTF8.
353
+ """
354
+ if self.urlpath is None:
355
+ raise ValueError("No file selected")
356
+ return OpenFile(self.fs, self.urlpath, mode, compression, encoding)
357
+
358
+ def filters_changed(self, values):
359
+ self.filters = values
360
+ self.go_clicked()
361
+
362
+ def selection_changed(self, *_):
363
+ if self.urlpath is None:
364
+ return
365
+ if self.fs.isdir(self.urlpath):
366
+ self.url.value = self.fs._strip_protocol(self.urlpath)
367
+ self.go_clicked()
368
+
369
+ def go_clicked(self, *_):
370
+ if (
371
+ self.prev_protocol != self.protocol.value
372
+ or self.prev_kwargs != self.storage_options
373
+ ):
374
+ self._fs = None # causes fs to be recreated
375
+ self.prev_protocol = self.protocol.value
376
+ self.prev_kwargs = self.storage_options
377
+ listing = sorted(
378
+ self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"]
379
+ )
380
+ listing = [
381
+ l
382
+ for l in listing
383
+ if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore)
384
+ ]
385
+ folders = {
386
+ "📁 " + o["name"].rsplit("/", 1)[-1]: o["name"]
387
+ for o in listing
388
+ if o["type"] == "directory"
389
+ }
390
+ files = {
391
+ "📄 " + o["name"].rsplit("/", 1)[-1]: o["name"]
392
+ for o in listing
393
+ if o["type"] == "file"
394
+ }
395
+ if self.filters:
396
+ files = {
397
+ k: v
398
+ for k, v in files.items()
399
+ if any(v.endswith(ext) for ext in self.filters)
400
+ }
401
+ self.main.set_options(dict(**folders, **files))
402
+
403
+ def protocol_changed(self, *_):
404
+ self._fs = None
405
+ self.main.options = []
406
+ self.url.value = ""
407
+
408
+ def home_clicked(self, *_):
409
+ self.protocol.value = self.init_protocol
410
+ self.kwargs.value = self.init_kwargs
411
+ self.url.value = self.init_url
412
+ self.go_clicked()
413
+
414
+ def up_clicked(self, *_):
415
+ self.url.value = self.fs._parent(self.url.value)
416
+ self.go_clicked()