Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +33 -0
- 17.BangBros.Halloween Special With A Threesome.mih16199.mp4 +3 -0
- 17.BangBros.Stalking Pussy.bbe16045.mp4 +3 -0
- 17.Clips4Sale.Cory's Super Heroine Adventures.Super Spy vs Super Villain.mp4 +3 -0
- 17.DevilsFilm.Mommy Likes To Watch 2.mp4 +3 -0
- 17.DigitalPlayground.When College Girls Attack.mp4 +3 -0
- 17.EvilAngel.Porn Pro Drills 20YO Rim Jobber Kenzie.mp4 +3 -0
- 17.GirlsWay.Biology Exam.mp4 +3 -0
- 17.LoveHerFeet.Multiplayer.mp4 +3 -0
- 17.Mofos.Spinner Sucks Cock for Fame.mp4 +3 -0
- 17.NewSensations.Sexy Little Sister Kenzie Wants To Do It.mp4 +3 -0
- 18.Brazzers.Selfies With The Dean.mp4 +3 -0
- 18.BurningAngel.Human Pinata.mp4 +3 -0
- 18.CherryPimps.Wild Girls Kenzie Reeves And Vina Sky LIVE.mp4 +3 -0
- 18.Down The Hatch 29.mp4 +3 -0
- 18.GirlsWay.I Love You Both.mp4 +3 -0
- 18.GirlsWay.I Love You Too.mp4 +3 -0
- 18.GirlsWay.Peer Pressure - The Sharing Sisters.mp4 +3 -0
- 18.LesbianX.Anal Gaping Slumber Party.mp4 +3 -0
- 18.MissaX.In Love With Daddy.mp4 +3 -0
- 18.Mofos.A Sneaky Threesome Situation.mp4 +3 -0
- 18.Mofos.Teen Tastes Horny Stepmom's Pussy.mp4 +3 -0
- 18.Nubiles-Porn.Spring Break Lake Powell 1.mp4 +3 -0
- 18.PervCity.Tiny Blonde Teen Kenzie Reeves Has A Big Orgasm.mp4 +3 -0
- 18.PornFidelity.Losing It.mp4 +3 -0
- 18.PropertySex.VixenX.Been a While.mp4 +3 -0
- 18.PureTaboo.Trailer Park Taboo 3.mp4 +3 -0
- 18.RealityKings.Social Proof.mp4 +3 -0
- 19.Analized.Is The Ultimate Teenage Anal Slut.mp4 +3 -0
- 19.ArchangelVideo.My Pussy Is Dripping.mp4 +3 -0
- 19.BlowPass.Kenzie Loves Cock.mp4 +3 -0
- 19.Deeper.Dare You.101231.mp4 +3 -0
- 19.DogFartNetwork.BlackMeatWhiteFeet.And Big Black Cock.mp4 +3 -0
- 19.ElegantAngel.It'S A Family Thing 3.mp4 +3 -0
- venv/lib/python3.12/site-packages/filelock-3.18.0.dist-info/licenses/LICENSE +24 -0
- venv/lib/python3.12/site-packages/fsspec/__init__.py +71 -0
- venv/lib/python3.12/site-packages/fsspec/_version.py +21 -0
- venv/lib/python3.12/site-packages/fsspec/archive.py +75 -0
- venv/lib/python3.12/site-packages/fsspec/asyn.py +1110 -0
- venv/lib/python3.12/site-packages/fsspec/caching.py +1005 -0
- venv/lib/python3.12/site-packages/fsspec/callbacks.py +324 -0
- venv/lib/python3.12/site-packages/fsspec/compression.py +175 -0
- venv/lib/python3.12/site-packages/fsspec/config.py +131 -0
- venv/lib/python3.12/site-packages/fsspec/conftest.py +55 -0
- venv/lib/python3.12/site-packages/fsspec/core.py +743 -0
- venv/lib/python3.12/site-packages/fsspec/dircache.py +98 -0
- venv/lib/python3.12/site-packages/fsspec/exceptions.py +18 -0
- venv/lib/python3.12/site-packages/fsspec/fuse.py +324 -0
- venv/lib/python3.12/site-packages/fsspec/generic.py +395 -0
- venv/lib/python3.12/site-packages/fsspec/gui.py +416 -0
.gitattributes
CHANGED
@@ -103,3 +103,36 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
103 |
18.Clips4Sale.Primal's[[:space:]]Taboo[[:space:]]Sex.The[[:space:]]Sister[[:space:]]That[[:space:]]Got[[:space:]]What[[:space:]]She[[:space:]]Wanted.mp4 filter=lfs diff=lfs merge=lfs -text
|
104 |
18.POVD.Thanksgiving[[:space:]]Creampie.mp4 filter=lfs diff=lfs merge=lfs -text
|
105 |
18.NewSensations.Kenzie[[:space:]]Waits[[:space:]]For[[:space:]]Daddy[[:space:]]To[[:space:]]Cum.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
18.Clips4Sale.Primal's[[:space:]]Taboo[[:space:]]Sex.The[[:space:]]Sister[[:space:]]That[[:space:]]Got[[:space:]]What[[:space:]]She[[:space:]]Wanted.mp4 filter=lfs diff=lfs merge=lfs -text
|
104 |
18.POVD.Thanksgiving[[:space:]]Creampie.mp4 filter=lfs diff=lfs merge=lfs -text
|
105 |
18.NewSensations.Kenzie[[:space:]]Waits[[:space:]]For[[:space:]]Daddy[[:space:]]To[[:space:]]Cum.mp4 filter=lfs diff=lfs merge=lfs -text
|
106 |
+
18.Nubiles-Porn.Spring[[:space:]]Break[[:space:]]Lake[[:space:]]Powell[[:space:]]1.mp4 filter=lfs diff=lfs merge=lfs -text
|
107 |
+
18.Down[[:space:]]The[[:space:]]Hatch[[:space:]]29.mp4 filter=lfs diff=lfs merge=lfs -text
|
108 |
+
19.ElegantAngel.It'S[[:space:]]A[[:space:]]Family[[:space:]]Thing[[:space:]]3.mp4 filter=lfs diff=lfs merge=lfs -text
|
109 |
+
19.BlowPass.Kenzie[[:space:]]Loves[[:space:]]Cock.mp4 filter=lfs diff=lfs merge=lfs -text
|
110 |
+
18.MissaX.In[[:space:]]Love[[:space:]]With[[:space:]]Daddy.mp4 filter=lfs diff=lfs merge=lfs -text
|
111 |
+
18.CherryPimps.Wild[[:space:]]Girls[[:space:]]Kenzie[[:space:]]Reeves[[:space:]]And[[:space:]]Vina[[:space:]]Sky[[:space:]]LIVE.mp4 filter=lfs diff=lfs merge=lfs -text
|
112 |
+
17.EvilAngel.Porn[[:space:]]Pro[[:space:]]Drills[[:space:]]20YO[[:space:]]Rim[[:space:]]Jobber[[:space:]]Kenzie.mp4 filter=lfs diff=lfs merge=lfs -text
|
113 |
+
18.GirlsWay.Peer[[:space:]]Pressure[[:space:]]-[[:space:]]The[[:space:]]Sharing[[:space:]]Sisters.mp4 filter=lfs diff=lfs merge=lfs -text
|
114 |
+
19.ArchangelVideo.My[[:space:]]Pussy[[:space:]]Is[[:space:]]Dripping.mp4 filter=lfs diff=lfs merge=lfs -text
|
115 |
+
18.Mofos.Teen[[:space:]]Tastes[[:space:]]Horny[[:space:]]Stepmom's[[:space:]]Pussy.mp4 filter=lfs diff=lfs merge=lfs -text
|
116 |
+
19.Analized.Is[[:space:]]The[[:space:]]Ultimate[[:space:]]Teenage[[:space:]]Anal[[:space:]]Slut.mp4 filter=lfs diff=lfs merge=lfs -text
|
117 |
+
17.GirlsWay.Biology[[:space:]]Exam.mp4 filter=lfs diff=lfs merge=lfs -text
|
118 |
+
18.GirlsWay.I[[:space:]]Love[[:space:]]You[[:space:]]Too.mp4 filter=lfs diff=lfs merge=lfs -text
|
119 |
+
18.Brazzers.Selfies[[:space:]]With[[:space:]]The[[:space:]]Dean.mp4 filter=lfs diff=lfs merge=lfs -text
|
120 |
+
18.PornFidelity.Losing[[:space:]]It.mp4 filter=lfs diff=lfs merge=lfs -text
|
121 |
+
18.PropertySex.VixenX.Been[[:space:]]a[[:space:]]While.mp4 filter=lfs diff=lfs merge=lfs -text
|
122 |
+
18.RealityKings.Social[[:space:]]Proof.mp4 filter=lfs diff=lfs merge=lfs -text
|
123 |
+
19.DogFartNetwork.BlackMeatWhiteFeet.And[[:space:]]Big[[:space:]]Black[[:space:]]Cock.mp4 filter=lfs diff=lfs merge=lfs -text
|
124 |
+
17.NewSensations.Sexy[[:space:]]Little[[:space:]]Sister[[:space:]]Kenzie[[:space:]]Wants[[:space:]]To[[:space:]]Do[[:space:]]It.mp4 filter=lfs diff=lfs merge=lfs -text
|
125 |
+
17.BangBros.Stalking[[:space:]]Pussy.bbe16045.mp4 filter=lfs diff=lfs merge=lfs -text
|
126 |
+
18.BurningAngel.Human[[:space:]]Pinata.mp4 filter=lfs diff=lfs merge=lfs -text
|
127 |
+
17.DigitalPlayground.When[[:space:]]College[[:space:]]Girls[[:space:]]Attack.mp4 filter=lfs diff=lfs merge=lfs -text
|
128 |
+
17.BangBros.Halloween[[:space:]]Special[[:space:]]With[[:space:]]A[[:space:]]Threesome.mih16199.mp4 filter=lfs diff=lfs merge=lfs -text
|
129 |
+
17.Clips4Sale.Cory's[[:space:]]Super[[:space:]]Heroine[[:space:]]Adventures.Super[[:space:]]Spy[[:space:]]vs[[:space:]]Super[[:space:]]Villain.mp4 filter=lfs diff=lfs merge=lfs -text
|
130 |
+
17.DevilsFilm.Mommy[[:space:]]Likes[[:space:]]To[[:space:]]Watch[[:space:]]2.mp4 filter=lfs diff=lfs merge=lfs -text
|
131 |
+
18.LesbianX.Anal[[:space:]]Gaping[[:space:]]Slumber[[:space:]]Party.mp4 filter=lfs diff=lfs merge=lfs -text
|
132 |
+
17.LoveHerFeet.Multiplayer.mp4 filter=lfs diff=lfs merge=lfs -text
|
133 |
+
18.PureTaboo.Trailer[[:space:]]Park[[:space:]]Taboo[[:space:]]3.mp4 filter=lfs diff=lfs merge=lfs -text
|
134 |
+
18.PervCity.Tiny[[:space:]]Blonde[[:space:]]Teen[[:space:]]Kenzie[[:space:]]Reeves[[:space:]]Has[[:space:]]A[[:space:]]Big[[:space:]]Orgasm.mp4 filter=lfs diff=lfs merge=lfs -text
|
135 |
+
18.GirlsWay.I[[:space:]]Love[[:space:]]You[[:space:]]Both.mp4 filter=lfs diff=lfs merge=lfs -text
|
136 |
+
19.Deeper.Dare[[:space:]]You.101231.mp4 filter=lfs diff=lfs merge=lfs -text
|
137 |
+
17.Mofos.Spinner[[:space:]]Sucks[[:space:]]Cock[[:space:]]for[[:space:]]Fame.mp4 filter=lfs diff=lfs merge=lfs -text
|
138 |
+
18.Mofos.A[[:space:]]Sneaky[[:space:]]Threesome[[:space:]]Situation.mp4 filter=lfs diff=lfs merge=lfs -text
|
17.BangBros.Halloween Special With A Threesome.mih16199.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:884e879b96ec030cc8a14e1e7a32a9f65d39a85354ad2bb21d188ab04e278f5b
|
3 |
+
size 2422708123
|
17.BangBros.Stalking Pussy.bbe16045.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f3ea68e1a0c05b5018c53f2a326f7157bac8c8090d065bac6cc2ef6cf0ba19a
|
3 |
+
size 1934457211
|
17.Clips4Sale.Cory's Super Heroine Adventures.Super Spy vs Super Villain.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43034cc231bc539dc50693ec5870ac2257d0b6a10b6ae17599217010be5f6fb9
|
3 |
+
size 870873298
|
17.DevilsFilm.Mommy Likes To Watch 2.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7e298a4ac6d03b2a47ae94ff40e28c23b63971025a6b3ceafe2589c1df0b07e
|
3 |
+
size 648854235
|
17.DigitalPlayground.When College Girls Attack.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36f85fa2a830fafa0f9c8db488c6ab9747667053d3fef0c386e94775a0fbac54
|
3 |
+
size 1377442258
|
17.EvilAngel.Porn Pro Drills 20YO Rim Jobber Kenzie.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b436c5e022e8202b69e9979f447148453d79d42a678f83945b7d23bc351df7
|
3 |
+
size 872243786
|
17.GirlsWay.Biology Exam.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c23a3b48e46e4d430fcb18bc8bf09eb89528b306ba3a6cf52b04755649feb210
|
3 |
+
size 1592606077
|
17.LoveHerFeet.Multiplayer.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8613c7f6f11351f522a5bf70620e551c052c4792ce570657c66280b28c867411
|
3 |
+
size 3093689738
|
17.Mofos.Spinner Sucks Cock for Fame.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fc2687e483e90c4b6f4e4e54116c3c51d8e5191959ff42d58c195fabfff1231
|
3 |
+
size 3241790517
|
17.NewSensations.Sexy Little Sister Kenzie Wants To Do It.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9bb7a31a09c770db451f771fc31bc2dfbe4a1a52a38732ba2d89af3173dd7ff
|
3 |
+
size 3461941399
|
18.Brazzers.Selfies With The Dean.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c49d7ad91b9dde0e1f7ecbbee569b8097576f3cc2309920fcb881b5c39b44281
|
3 |
+
size 1276373412
|
18.BurningAngel.Human Pinata.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4885a26346a2c582ff867896d250cc98fe001bf4c45174a00173071256ecbf
|
3 |
+
size 1038731154
|
18.CherryPimps.Wild Girls Kenzie Reeves And Vina Sky LIVE.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d1cb8e43eb18a4dfc7af97ff4f6a863d75b407021068ecf3fab86eab1913742
|
3 |
+
size 2245370403
|
18.Down The Hatch 29.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e7d0fea1b35b4b04cef496a03c9f23dcb6c0e3c6565cc5f1d59994fb8169d4e
|
3 |
+
size 392756777
|
18.GirlsWay.I Love You Both.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0736f08f5d81d6c6e66c992593094c159d798435cbbd692a3c1eaa51113837
|
3 |
+
size 1980038412
|
18.GirlsWay.I Love You Too.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:896db7f0a41b9c1d84d06aa92f5496bcc6f6e14a4e0376abeb340344f4e6c2d1
|
3 |
+
size 2152267815
|
18.GirlsWay.Peer Pressure - The Sharing Sisters.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0547799d4e8c871884e338174dc002df3ec5946f54fe6353a1d07ffd1e17136
|
3 |
+
size 2978343703
|
18.LesbianX.Anal Gaping Slumber Party.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33336790c8d0c09d9906bb51ae56f221d9225977573f3d592568a4acb6eb2b91
|
3 |
+
size 1370767697
|
18.MissaX.In Love With Daddy.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7168383cafc414bfbf65c11d51a2cfbd1cedf81d187007c29987e440093968dd
|
3 |
+
size 1567924147
|
18.Mofos.A Sneaky Threesome Situation.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9ac5c78b37130309d6611d6df4a3a682b9be19def52c46ba8c6affc88b49ced
|
3 |
+
size 2553280872
|
18.Mofos.Teen Tastes Horny Stepmom's Pussy.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d1164cdc046dc7460372352ed5926b3aea7de4f672887f3b18a624004155b7b
|
3 |
+
size 3301988676
|
18.Nubiles-Porn.Spring Break Lake Powell 1.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c4a91615484fd7a480576e77ce80ec4bea136be0a8b972951064b3c9cf34b1f
|
3 |
+
size 1786930517
|
18.PervCity.Tiny Blonde Teen Kenzie Reeves Has A Big Orgasm.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:165f672238f896968ddddb4e96c28db32f373147b590fd8b3db67817b8371292
|
3 |
+
size 944538607
|
18.PornFidelity.Losing It.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5e73aed9043d81b40c137869eeb68d81ab7a45d72df52b3809f4783a885eef5
|
3 |
+
size 1017159752
|
18.PropertySex.VixenX.Been a While.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83995f5915320416fb2dea76cca22aa7b9c2cd53685bc8d7ddfc259a6c1097c1
|
3 |
+
size 3520856768
|
18.PureTaboo.Trailer Park Taboo 3.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2d399deb76ce6f713f9a9f1af86701fc6777a5d628119a23666819e62dd7510
|
3 |
+
size 1237153374
|
18.RealityKings.Social Proof.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:009864a80f69aa32706b2277dde75504ed0eb4dfb56a05721a4bbcb21f957f13
|
3 |
+
size 3091314409
|
19.Analized.Is The Ultimate Teenage Anal Slut.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc240588b4f60a7542ef89fa2a55a85d91aeb6c0afcb7fed8c5e94965652aecc
|
3 |
+
size 2621108271
|
19.ArchangelVideo.My Pussy Is Dripping.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8f9393e755d2618e2bc21c8617a0ab223ebb78f7dbf7c642d9cdd479b5670fe
|
3 |
+
size 1916687999
|
19.BlowPass.Kenzie Loves Cock.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62c78684762704b653ed11810e6d9522fe196d95fe576049ebc42fd3dd5cad21
|
3 |
+
size 2174866312
|
19.Deeper.Dare You.101231.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3564ff804379de6fda587031bdb9bf565f656fd08dec1b36b1081c12aafac10f
|
3 |
+
size 3373955368
|
19.DogFartNetwork.BlackMeatWhiteFeet.And Big Black Cock.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21b54744365838fda8d13819af758b20a72cd105c8772aa3f169b17117bcf850
|
3 |
+
size 3124523333
|
19.ElegantAngel.It'S A Family Thing 3.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10c8f94415bc763c56e16ca16862386c01aaa4fe597e4ed6a2b0a518594671bf
|
3 |
+
size 604433859
|
venv/lib/python3.12/site-packages/filelock-3.18.0.dist-info/licenses/LICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This is free and unencumbered software released into the public domain.
|
2 |
+
|
3 |
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
4 |
+
distribute this software, either in source code form or as a compiled
|
5 |
+
binary, for any purpose, commercial or non-commercial, and by any
|
6 |
+
means.
|
7 |
+
|
8 |
+
In jurisdictions that recognize copyright laws, the author or authors
|
9 |
+
of this software dedicate any and all copyright interest in the
|
10 |
+
software to the public domain. We make this dedication for the benefit
|
11 |
+
of the public at large and to the detriment of our heirs and
|
12 |
+
successors. We intend this dedication to be an overt act of
|
13 |
+
relinquishment in perpetuity of all present and future rights to this
|
14 |
+
software under copyright law.
|
15 |
+
|
16 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17 |
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19 |
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
20 |
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
21 |
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22 |
+
OTHER DEALINGS IN THE SOFTWARE.
|
23 |
+
|
24 |
+
For more information, please refer to <http://unlicense.org>
|
venv/lib/python3.12/site-packages/fsspec/__init__.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from . import caching
|
2 |
+
from ._version import __version__ # noqa: F401
|
3 |
+
from .callbacks import Callback
|
4 |
+
from .compression import available_compressions
|
5 |
+
from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
|
6 |
+
from .exceptions import FSTimeoutError
|
7 |
+
from .mapping import FSMap, get_mapper
|
8 |
+
from .registry import (
|
9 |
+
available_protocols,
|
10 |
+
filesystem,
|
11 |
+
get_filesystem_class,
|
12 |
+
register_implementation,
|
13 |
+
registry,
|
14 |
+
)
|
15 |
+
from .spec import AbstractFileSystem
|
16 |
+
|
17 |
+
__all__ = [
|
18 |
+
"AbstractFileSystem",
|
19 |
+
"FSTimeoutError",
|
20 |
+
"FSMap",
|
21 |
+
"filesystem",
|
22 |
+
"register_implementation",
|
23 |
+
"get_filesystem_class",
|
24 |
+
"get_fs_token_paths",
|
25 |
+
"get_mapper",
|
26 |
+
"open",
|
27 |
+
"open_files",
|
28 |
+
"open_local",
|
29 |
+
"registry",
|
30 |
+
"caching",
|
31 |
+
"Callback",
|
32 |
+
"available_protocols",
|
33 |
+
"available_compressions",
|
34 |
+
"url_to_fs",
|
35 |
+
]
|
36 |
+
|
37 |
+
|
38 |
+
def process_entries():
|
39 |
+
try:
|
40 |
+
from importlib.metadata import entry_points
|
41 |
+
except ImportError:
|
42 |
+
return
|
43 |
+
if entry_points is not None:
|
44 |
+
try:
|
45 |
+
eps = entry_points()
|
46 |
+
except TypeError:
|
47 |
+
pass # importlib-metadata < 0.8
|
48 |
+
else:
|
49 |
+
if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
|
50 |
+
specs = eps.select(group="fsspec.specs")
|
51 |
+
else:
|
52 |
+
specs = eps.get("fsspec.specs", [])
|
53 |
+
registered_names = {}
|
54 |
+
for spec in specs:
|
55 |
+
err_msg = f"Unable to load filesystem from {spec}"
|
56 |
+
name = spec.name
|
57 |
+
if name in registered_names:
|
58 |
+
continue
|
59 |
+
registered_names[name] = True
|
60 |
+
register_implementation(
|
61 |
+
name,
|
62 |
+
spec.value.replace(":", "."),
|
63 |
+
errtxt=err_msg,
|
64 |
+
# We take our implementations as the ones to overload with if
|
65 |
+
# for some reason we encounter some, may be the same, already
|
66 |
+
# registered
|
67 |
+
clobber=True,
|
68 |
+
)
|
69 |
+
|
70 |
+
|
71 |
+
process_entries()
|
venv/lib/python3.12/site-packages/fsspec/_version.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# file generated by setuptools-scm
|
2 |
+
# don't change, don't track in version control
|
3 |
+
|
4 |
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
5 |
+
|
6 |
+
TYPE_CHECKING = False
|
7 |
+
if TYPE_CHECKING:
|
8 |
+
from typing import Tuple
|
9 |
+
from typing import Union
|
10 |
+
|
11 |
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
12 |
+
else:
|
13 |
+
VERSION_TUPLE = object
|
14 |
+
|
15 |
+
version: str
|
16 |
+
__version__: str
|
17 |
+
__version_tuple__: VERSION_TUPLE
|
18 |
+
version_tuple: VERSION_TUPLE
|
19 |
+
|
20 |
+
__version__ = version = '2025.5.1'
|
21 |
+
__version_tuple__ = version_tuple = (2025, 5, 1)
|
venv/lib/python3.12/site-packages/fsspec/archive.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import operator
|
2 |
+
|
3 |
+
from fsspec import AbstractFileSystem
|
4 |
+
from fsspec.utils import tokenize
|
5 |
+
|
6 |
+
|
7 |
+
class AbstractArchiveFileSystem(AbstractFileSystem):
|
8 |
+
"""
|
9 |
+
A generic superclass for implementing Archive-based filesystems.
|
10 |
+
|
11 |
+
Currently, it is shared amongst
|
12 |
+
:class:`~fsspec.implementations.zip.ZipFileSystem`,
|
13 |
+
:class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
|
14 |
+
:class:`~fsspec.implementations.tar.TarFileSystem`.
|
15 |
+
"""
|
16 |
+
|
17 |
+
def __str__(self):
|
18 |
+
return f"<Archive-like object {type(self).__name__} at {id(self)}>"
|
19 |
+
|
20 |
+
__repr__ = __str__
|
21 |
+
|
22 |
+
def ukey(self, path):
|
23 |
+
return tokenize(path, self.fo, self.protocol)
|
24 |
+
|
25 |
+
def _all_dirnames(self, paths):
|
26 |
+
"""Returns *all* directory names for each path in paths, including intermediate
|
27 |
+
ones.
|
28 |
+
|
29 |
+
Parameters
|
30 |
+
----------
|
31 |
+
paths: Iterable of path strings
|
32 |
+
"""
|
33 |
+
if len(paths) == 0:
|
34 |
+
return set()
|
35 |
+
|
36 |
+
dirnames = {self._parent(path) for path in paths} - {self.root_marker}
|
37 |
+
return dirnames | self._all_dirnames(dirnames)
|
38 |
+
|
39 |
+
def info(self, path, **kwargs):
|
40 |
+
self._get_dirs()
|
41 |
+
path = self._strip_protocol(path)
|
42 |
+
if path in {"", "/"} and self.dir_cache:
|
43 |
+
return {"name": "", "type": "directory", "size": 0}
|
44 |
+
if path in self.dir_cache:
|
45 |
+
return self.dir_cache[path]
|
46 |
+
elif path + "/" in self.dir_cache:
|
47 |
+
return self.dir_cache[path + "/"]
|
48 |
+
else:
|
49 |
+
raise FileNotFoundError(path)
|
50 |
+
|
51 |
+
def ls(self, path, detail=True, **kwargs):
|
52 |
+
self._get_dirs()
|
53 |
+
paths = {}
|
54 |
+
for p, f in self.dir_cache.items():
|
55 |
+
p = p.rstrip("/")
|
56 |
+
if "/" in p:
|
57 |
+
root = p.rsplit("/", 1)[0]
|
58 |
+
else:
|
59 |
+
root = ""
|
60 |
+
if root == path.rstrip("/"):
|
61 |
+
paths[p] = f
|
62 |
+
elif all(
|
63 |
+
(a == b)
|
64 |
+
for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
|
65 |
+
):
|
66 |
+
# root directory entry
|
67 |
+
ppath = p.rstrip("/").split("/", 1)[0]
|
68 |
+
if ppath not in paths:
|
69 |
+
out = {"name": ppath, "size": 0, "type": "directory"}
|
70 |
+
paths[ppath] = out
|
71 |
+
if detail:
|
72 |
+
out = sorted(paths.values(), key=operator.itemgetter("name"))
|
73 |
+
return out
|
74 |
+
else:
|
75 |
+
return sorted(paths)
|
venv/lib/python3.12/site-packages/fsspec/asyn.py
ADDED
@@ -0,0 +1,1110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import asyncio.events
|
3 |
+
import functools
|
4 |
+
import inspect
|
5 |
+
import io
|
6 |
+
import numbers
|
7 |
+
import os
|
8 |
+
import re
|
9 |
+
import threading
|
10 |
+
from contextlib import contextmanager
|
11 |
+
from glob import has_magic
|
12 |
+
from typing import TYPE_CHECKING, Iterable
|
13 |
+
|
14 |
+
from .callbacks import DEFAULT_CALLBACK
|
15 |
+
from .exceptions import FSTimeoutError
|
16 |
+
from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
|
17 |
+
from .spec import AbstractBufferedFile, AbstractFileSystem
|
18 |
+
from .utils import glob_translate, is_exception, other_paths
|
19 |
+
|
20 |
+
private = re.compile("_[^_]")
|
21 |
+
iothread = [None] # dedicated fsspec IO thread
|
22 |
+
loop = [None] # global event loop for any non-async instance
|
23 |
+
_lock = None # global lock placeholder
|
24 |
+
get_running_loop = asyncio.get_running_loop
|
25 |
+
|
26 |
+
|
27 |
+
def get_lock():
|
28 |
+
"""Allocate or return a threading lock.
|
29 |
+
|
30 |
+
The lock is allocated on first use to allow setting one lock per forked process.
|
31 |
+
"""
|
32 |
+
global _lock
|
33 |
+
if not _lock:
|
34 |
+
_lock = threading.Lock()
|
35 |
+
return _lock
|
36 |
+
|
37 |
+
|
38 |
+
def reset_lock():
|
39 |
+
"""Reset the global lock.
|
40 |
+
|
41 |
+
This should be called only on the init of a forked process to reset the lock to
|
42 |
+
None, enabling the new forked process to get a new lock.
|
43 |
+
"""
|
44 |
+
global _lock
|
45 |
+
|
46 |
+
iothread[0] = None
|
47 |
+
loop[0] = None
|
48 |
+
_lock = None
|
49 |
+
|
50 |
+
|
51 |
+
async def _runner(event, coro, result, timeout=None):
|
52 |
+
timeout = timeout if timeout else None # convert 0 or 0.0 to None
|
53 |
+
if timeout is not None:
|
54 |
+
coro = asyncio.wait_for(coro, timeout=timeout)
|
55 |
+
try:
|
56 |
+
result[0] = await coro
|
57 |
+
except Exception as ex:
|
58 |
+
result[0] = ex
|
59 |
+
finally:
|
60 |
+
event.set()
|
61 |
+
|
62 |
+
|
63 |
+
def sync(loop, func, *args, timeout=None, **kwargs):
|
64 |
+
"""
|
65 |
+
Make loop run coroutine until it returns. Runs in other thread
|
66 |
+
|
67 |
+
Examples
|
68 |
+
--------
|
69 |
+
>>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
|
70 |
+
timeout=timeout, **kwargs)
|
71 |
+
"""
|
72 |
+
timeout = timeout if timeout else None # convert 0 or 0.0 to None
|
73 |
+
# NB: if the loop is not running *yet*, it is OK to submit work
|
74 |
+
# and we will wait for it
|
75 |
+
if loop is None or loop.is_closed():
|
76 |
+
raise RuntimeError("Loop is not running")
|
77 |
+
try:
|
78 |
+
loop0 = asyncio.events.get_running_loop()
|
79 |
+
if loop0 is loop:
|
80 |
+
raise NotImplementedError("Calling sync() from within a running loop")
|
81 |
+
except NotImplementedError:
|
82 |
+
raise
|
83 |
+
except RuntimeError:
|
84 |
+
pass
|
85 |
+
coro = func(*args, **kwargs)
|
86 |
+
result = [None]
|
87 |
+
event = threading.Event()
|
88 |
+
asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
|
89 |
+
while True:
|
90 |
+
# this loops allows thread to get interrupted
|
91 |
+
if event.wait(1):
|
92 |
+
break
|
93 |
+
if timeout is not None:
|
94 |
+
timeout -= 1
|
95 |
+
if timeout < 0:
|
96 |
+
raise FSTimeoutError
|
97 |
+
|
98 |
+
return_result = result[0]
|
99 |
+
if isinstance(return_result, asyncio.TimeoutError):
|
100 |
+
# suppress asyncio.TimeoutError, raise FSTimeoutError
|
101 |
+
raise FSTimeoutError from return_result
|
102 |
+
elif isinstance(return_result, BaseException):
|
103 |
+
raise return_result
|
104 |
+
else:
|
105 |
+
return return_result
|
106 |
+
|
107 |
+
|
108 |
+
def sync_wrapper(func, obj=None):
|
109 |
+
"""Given a function, make so can be called in blocking contexts
|
110 |
+
|
111 |
+
Leave obj=None if defining within a class. Pass the instance if attaching
|
112 |
+
as an attribute of the instance.
|
113 |
+
"""
|
114 |
+
|
115 |
+
@functools.wraps(func)
|
116 |
+
def wrapper(*args, **kwargs):
|
117 |
+
self = obj or args[0]
|
118 |
+
return sync(self.loop, func, *args, **kwargs)
|
119 |
+
|
120 |
+
return wrapper
|
121 |
+
|
122 |
+
|
123 |
+
@contextmanager
|
124 |
+
def _selector_policy():
|
125 |
+
original_policy = asyncio.get_event_loop_policy()
|
126 |
+
try:
|
127 |
+
if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"):
|
128 |
+
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
129 |
+
|
130 |
+
yield
|
131 |
+
finally:
|
132 |
+
asyncio.set_event_loop_policy(original_policy)
|
133 |
+
|
134 |
+
|
135 |
+
def get_loop():
|
136 |
+
"""Create or return the default fsspec IO loop
|
137 |
+
|
138 |
+
The loop will be running on a separate thread.
|
139 |
+
"""
|
140 |
+
if loop[0] is None:
|
141 |
+
with get_lock():
|
142 |
+
# repeat the check just in case the loop got filled between the
|
143 |
+
# previous two calls from another thread
|
144 |
+
if loop[0] is None:
|
145 |
+
with _selector_policy():
|
146 |
+
loop[0] = asyncio.new_event_loop()
|
147 |
+
th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
|
148 |
+
th.daemon = True
|
149 |
+
th.start()
|
150 |
+
iothread[0] = th
|
151 |
+
return loop[0]
|
152 |
+
|
153 |
+
|
154 |
+
def reset_after_fork():
|
155 |
+
global lock
|
156 |
+
loop[0] = None
|
157 |
+
iothread[0] = None
|
158 |
+
lock = None
|
159 |
+
|
160 |
+
|
161 |
+
if hasattr(os, "register_at_fork"):
|
162 |
+
# should be posix; this will do nothing for spawn or forkserver subprocesses
|
163 |
+
os.register_at_fork(after_in_child=reset_after_fork)
|
164 |
+
|
165 |
+
|
166 |
+
if TYPE_CHECKING:
|
167 |
+
import resource
|
168 |
+
|
169 |
+
ResourceError = resource.error
|
170 |
+
else:
|
171 |
+
try:
|
172 |
+
import resource
|
173 |
+
except ImportError:
|
174 |
+
resource = None
|
175 |
+
ResourceError = OSError
|
176 |
+
else:
|
177 |
+
ResourceError = getattr(resource, "error", OSError)
|
178 |
+
|
179 |
+
_DEFAULT_BATCH_SIZE = 128
|
180 |
+
_NOFILES_DEFAULT_BATCH_SIZE = 1280
|
181 |
+
|
182 |
+
|
183 |
+
def _get_batch_size(nofiles=False):
|
184 |
+
from fsspec.config import conf
|
185 |
+
|
186 |
+
if nofiles:
|
187 |
+
if "nofiles_gather_batch_size" in conf:
|
188 |
+
return conf["nofiles_gather_batch_size"]
|
189 |
+
else:
|
190 |
+
if "gather_batch_size" in conf:
|
191 |
+
return conf["gather_batch_size"]
|
192 |
+
if nofiles:
|
193 |
+
return _NOFILES_DEFAULT_BATCH_SIZE
|
194 |
+
if resource is None:
|
195 |
+
return _DEFAULT_BATCH_SIZE
|
196 |
+
|
197 |
+
try:
|
198 |
+
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
|
199 |
+
except (ImportError, ValueError, ResourceError):
|
200 |
+
return _DEFAULT_BATCH_SIZE
|
201 |
+
|
202 |
+
if soft_limit == resource.RLIM_INFINITY:
|
203 |
+
return -1
|
204 |
+
else:
|
205 |
+
return soft_limit // 8
|
206 |
+
|
207 |
+
|
208 |
+
def running_async() -> bool:
|
209 |
+
"""Being executed by an event loop?"""
|
210 |
+
try:
|
211 |
+
asyncio.get_running_loop()
|
212 |
+
return True
|
213 |
+
except RuntimeError:
|
214 |
+
return False
|
215 |
+
|
216 |
+
|
217 |
+
async def _run_coros_in_chunks(
|
218 |
+
coros,
|
219 |
+
batch_size=None,
|
220 |
+
callback=DEFAULT_CALLBACK,
|
221 |
+
timeout=None,
|
222 |
+
return_exceptions=False,
|
223 |
+
nofiles=False,
|
224 |
+
):
|
225 |
+
"""Run the given coroutines in chunks.
|
226 |
+
|
227 |
+
Parameters
|
228 |
+
----------
|
229 |
+
coros: list of coroutines to run
|
230 |
+
batch_size: int or None
|
231 |
+
Number of coroutines to submit/wait on simultaneously.
|
232 |
+
If -1, then it will not be any throttling. If
|
233 |
+
None, it will be inferred from _get_batch_size()
|
234 |
+
callback: fsspec.callbacks.Callback instance
|
235 |
+
Gets a relative_update when each coroutine completes
|
236 |
+
timeout: number or None
|
237 |
+
If given, each coroutine times out after this time. Note that, since
|
238 |
+
there are multiple batches, the total run time of this function will in
|
239 |
+
general be longer
|
240 |
+
return_exceptions: bool
|
241 |
+
Same meaning as in asyncio.gather
|
242 |
+
nofiles: bool
|
243 |
+
If inferring the batch_size, does this operation involve local files?
|
244 |
+
If yes, you normally expect smaller batches.
|
245 |
+
"""
|
246 |
+
|
247 |
+
if batch_size is None:
|
248 |
+
batch_size = _get_batch_size(nofiles=nofiles)
|
249 |
+
|
250 |
+
if batch_size == -1:
|
251 |
+
batch_size = len(coros)
|
252 |
+
|
253 |
+
assert batch_size > 0
|
254 |
+
|
255 |
+
async def _run_coro(coro, i):
|
256 |
+
try:
|
257 |
+
return await asyncio.wait_for(coro, timeout=timeout), i
|
258 |
+
except Exception as e:
|
259 |
+
if not return_exceptions:
|
260 |
+
raise
|
261 |
+
return e, i
|
262 |
+
finally:
|
263 |
+
callback.relative_update(1)
|
264 |
+
|
265 |
+
i = 0
|
266 |
+
n = len(coros)
|
267 |
+
results = [None] * n
|
268 |
+
pending = set()
|
269 |
+
|
270 |
+
while pending or i < n:
|
271 |
+
while len(pending) < batch_size and i < n:
|
272 |
+
pending.add(asyncio.ensure_future(_run_coro(coros[i], i)))
|
273 |
+
i += 1
|
274 |
+
|
275 |
+
if not pending:
|
276 |
+
break
|
277 |
+
|
278 |
+
done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
|
279 |
+
while done:
|
280 |
+
result, k = await done.pop()
|
281 |
+
results[k] = result
|
282 |
+
|
283 |
+
return results
|
284 |
+
|
285 |
+
|
286 |
+
# these methods should be implemented as async by any async-able backend
|
287 |
+
async_methods = [
|
288 |
+
"_ls",
|
289 |
+
"_cat_file",
|
290 |
+
"_get_file",
|
291 |
+
"_put_file",
|
292 |
+
"_rm_file",
|
293 |
+
"_cp_file",
|
294 |
+
"_pipe_file",
|
295 |
+
"_expand_path",
|
296 |
+
"_info",
|
297 |
+
"_isfile",
|
298 |
+
"_isdir",
|
299 |
+
"_exists",
|
300 |
+
"_walk",
|
301 |
+
"_glob",
|
302 |
+
"_find",
|
303 |
+
"_du",
|
304 |
+
"_size",
|
305 |
+
"_mkdir",
|
306 |
+
"_makedirs",
|
307 |
+
]
|
308 |
+
|
309 |
+
|
310 |
+
class AsyncFileSystem(AbstractFileSystem):
|
311 |
+
"""Async file operations, default implementations
|
312 |
+
|
313 |
+
Passes bulk operations to asyncio.gather for concurrent operation.
|
314 |
+
|
315 |
+
Implementations that have concurrent batch operations and/or async methods
|
316 |
+
should inherit from this class instead of AbstractFileSystem. Docstrings are
|
317 |
+
copied from the un-underscored method in AbstractFileSystem, if not given.
|
318 |
+
"""
|
319 |
+
|
320 |
+
# note that methods do not have docstring here; they will be copied
|
321 |
+
# for _* methods and inferred for overridden methods.
|
322 |
+
|
323 |
+
async_impl = True
|
324 |
+
mirror_sync_methods = True
|
325 |
+
disable_throttling = False
|
326 |
+
|
327 |
+
def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
|
328 |
+
self.asynchronous = asynchronous
|
329 |
+
self._pid = os.getpid()
|
330 |
+
if not asynchronous:
|
331 |
+
self._loop = loop or get_loop()
|
332 |
+
else:
|
333 |
+
self._loop = None
|
334 |
+
self.batch_size = batch_size
|
335 |
+
super().__init__(*args, **kwargs)
|
336 |
+
|
337 |
+
@property
|
338 |
+
def loop(self):
|
339 |
+
if self._pid != os.getpid():
|
340 |
+
raise RuntimeError("This class is not fork-safe")
|
341 |
+
return self._loop
|
342 |
+
|
343 |
+
async def _rm_file(self, path, **kwargs):
|
344 |
+
raise NotImplementedError
|
345 |
+
|
346 |
+
async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
|
347 |
+
# TODO: implement on_error
|
348 |
+
batch_size = batch_size or self.batch_size
|
349 |
+
path = await self._expand_path(path, recursive=recursive)
|
350 |
+
return await _run_coros_in_chunks(
|
351 |
+
[self._rm_file(p, **kwargs) for p in reversed(path)],
|
352 |
+
batch_size=batch_size,
|
353 |
+
nofiles=True,
|
354 |
+
)
|
355 |
+
|
356 |
+
async def _cp_file(self, path1, path2, **kwargs):
|
357 |
+
raise NotImplementedError
|
358 |
+
|
359 |
+
async def _mv_file(self, path1, path2):
|
360 |
+
await self._cp_file(path1, path2)
|
361 |
+
await self._rm_file(path1)
|
362 |
+
|
363 |
+
async def _copy(
|
364 |
+
self,
|
365 |
+
path1,
|
366 |
+
path2,
|
367 |
+
recursive=False,
|
368 |
+
on_error=None,
|
369 |
+
maxdepth=None,
|
370 |
+
batch_size=None,
|
371 |
+
**kwargs,
|
372 |
+
):
|
373 |
+
if on_error is None and recursive:
|
374 |
+
on_error = "ignore"
|
375 |
+
elif on_error is None:
|
376 |
+
on_error = "raise"
|
377 |
+
|
378 |
+
if isinstance(path1, list) and isinstance(path2, list):
|
379 |
+
# No need to expand paths when both source and destination
|
380 |
+
# are provided as lists
|
381 |
+
paths1 = path1
|
382 |
+
paths2 = path2
|
383 |
+
else:
|
384 |
+
source_is_str = isinstance(path1, str)
|
385 |
+
paths1 = await self._expand_path(
|
386 |
+
path1, maxdepth=maxdepth, recursive=recursive
|
387 |
+
)
|
388 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
389 |
+
# Non-recursive glob does not copy directories
|
390 |
+
paths1 = [
|
391 |
+
p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
|
392 |
+
]
|
393 |
+
if not paths1:
|
394 |
+
return
|
395 |
+
|
396 |
+
source_is_file = len(paths1) == 1
|
397 |
+
dest_is_dir = isinstance(path2, str) and (
|
398 |
+
trailing_sep(path2) or await self._isdir(path2)
|
399 |
+
)
|
400 |
+
|
401 |
+
exists = source_is_str and (
|
402 |
+
(has_magic(path1) and source_is_file)
|
403 |
+
or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
|
404 |
+
)
|
405 |
+
paths2 = other_paths(
|
406 |
+
paths1,
|
407 |
+
path2,
|
408 |
+
exists=exists,
|
409 |
+
flatten=not source_is_str,
|
410 |
+
)
|
411 |
+
|
412 |
+
batch_size = batch_size or self.batch_size
|
413 |
+
coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
|
414 |
+
result = await _run_coros_in_chunks(
|
415 |
+
coros, batch_size=batch_size, return_exceptions=True, nofiles=True
|
416 |
+
)
|
417 |
+
|
418 |
+
for ex in filter(is_exception, result):
|
419 |
+
if on_error == "ignore" and isinstance(ex, FileNotFoundError):
|
420 |
+
continue
|
421 |
+
raise ex
|
422 |
+
|
423 |
+
async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
|
424 |
+
raise NotImplementedError
|
425 |
+
|
426 |
+
async def _pipe(self, path, value=None, batch_size=None, **kwargs):
|
427 |
+
if isinstance(path, str):
|
428 |
+
path = {path: value}
|
429 |
+
batch_size = batch_size or self.batch_size
|
430 |
+
return await _run_coros_in_chunks(
|
431 |
+
[self._pipe_file(k, v, **kwargs) for k, v in path.items()],
|
432 |
+
batch_size=batch_size,
|
433 |
+
nofiles=True,
|
434 |
+
)
|
435 |
+
|
436 |
+
async def _process_limits(self, url, start, end):
|
437 |
+
"""Helper for "Range"-based _cat_file"""
|
438 |
+
size = None
|
439 |
+
suff = False
|
440 |
+
if start is not None and start < 0:
|
441 |
+
# if start is negative and end None, end is the "suffix length"
|
442 |
+
if end is None:
|
443 |
+
end = -start
|
444 |
+
start = ""
|
445 |
+
suff = True
|
446 |
+
else:
|
447 |
+
size = size or (await self._info(url))["size"]
|
448 |
+
start = size + start
|
449 |
+
elif start is None:
|
450 |
+
start = 0
|
451 |
+
if not suff:
|
452 |
+
if end is not None and end < 0:
|
453 |
+
if start is not None:
|
454 |
+
size = size or (await self._info(url))["size"]
|
455 |
+
end = size + end
|
456 |
+
elif end is None:
|
457 |
+
end = ""
|
458 |
+
if isinstance(end, numbers.Integral):
|
459 |
+
end -= 1 # bytes range is inclusive
|
460 |
+
return f"bytes={start}-{end}"
|
461 |
+
|
462 |
+
async def _cat_file(self, path, start=None, end=None, **kwargs):
|
463 |
+
raise NotImplementedError
|
464 |
+
|
465 |
+
async def _cat(
|
466 |
+
self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
|
467 |
+
):
|
468 |
+
paths = await self._expand_path(path, recursive=recursive)
|
469 |
+
coros = [self._cat_file(path, **kwargs) for path in paths]
|
470 |
+
batch_size = batch_size or self.batch_size
|
471 |
+
out = await _run_coros_in_chunks(
|
472 |
+
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
473 |
+
)
|
474 |
+
if on_error == "raise":
|
475 |
+
ex = next(filter(is_exception, out), False)
|
476 |
+
if ex:
|
477 |
+
raise ex
|
478 |
+
if (
|
479 |
+
len(paths) > 1
|
480 |
+
or isinstance(path, list)
|
481 |
+
or paths[0] != self._strip_protocol(path)
|
482 |
+
):
|
483 |
+
return {
|
484 |
+
k: v
|
485 |
+
for k, v in zip(paths, out)
|
486 |
+
if on_error != "omit" or not is_exception(v)
|
487 |
+
}
|
488 |
+
else:
|
489 |
+
return out[0]
|
490 |
+
|
491 |
+
async def _cat_ranges(
|
492 |
+
self,
|
493 |
+
paths,
|
494 |
+
starts,
|
495 |
+
ends,
|
496 |
+
max_gap=None,
|
497 |
+
batch_size=None,
|
498 |
+
on_error="return",
|
499 |
+
**kwargs,
|
500 |
+
):
|
501 |
+
"""Get the contents of byte ranges from one or more files
|
502 |
+
|
503 |
+
Parameters
|
504 |
+
----------
|
505 |
+
paths: list
|
506 |
+
A list of of filepaths on this filesystems
|
507 |
+
starts, ends: int or list
|
508 |
+
Bytes limits of the read. If using a single int, the same value will be
|
509 |
+
used to read all the specified files.
|
510 |
+
"""
|
511 |
+
# TODO: on_error
|
512 |
+
if max_gap is not None:
|
513 |
+
# use utils.merge_offset_ranges
|
514 |
+
raise NotImplementedError
|
515 |
+
if not isinstance(paths, list):
|
516 |
+
raise TypeError
|
517 |
+
if not isinstance(starts, Iterable):
|
518 |
+
starts = [starts] * len(paths)
|
519 |
+
if not isinstance(ends, Iterable):
|
520 |
+
ends = [ends] * len(paths)
|
521 |
+
if len(starts) != len(paths) or len(ends) != len(paths):
|
522 |
+
raise ValueError
|
523 |
+
coros = [
|
524 |
+
self._cat_file(p, start=s, end=e, **kwargs)
|
525 |
+
for p, s, e in zip(paths, starts, ends)
|
526 |
+
]
|
527 |
+
batch_size = batch_size or self.batch_size
|
528 |
+
return await _run_coros_in_chunks(
|
529 |
+
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
530 |
+
)
|
531 |
+
|
532 |
+
async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
|
533 |
+
raise NotImplementedError
|
534 |
+
|
535 |
+
async def _put(
|
536 |
+
self,
|
537 |
+
lpath,
|
538 |
+
rpath,
|
539 |
+
recursive=False,
|
540 |
+
callback=DEFAULT_CALLBACK,
|
541 |
+
batch_size=None,
|
542 |
+
maxdepth=None,
|
543 |
+
**kwargs,
|
544 |
+
):
|
545 |
+
"""Copy file(s) from local.
|
546 |
+
|
547 |
+
Copies a specific file or tree of files (if recursive=True). If rpath
|
548 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
549 |
+
will go within.
|
550 |
+
|
551 |
+
The put_file method will be called concurrently on a batch of files. The
|
552 |
+
batch_size option can configure the amount of futures that can be executed
|
553 |
+
at the same time. If it is -1, then all the files will be uploaded concurrently.
|
554 |
+
The default can be set for this instance by passing "batch_size" in the
|
555 |
+
constructor, or for all instances by setting the "gather_batch_size" key
|
556 |
+
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
|
557 |
+
"""
|
558 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
559 |
+
# No need to expand paths when both source and destination
|
560 |
+
# are provided as lists
|
561 |
+
rpaths = rpath
|
562 |
+
lpaths = lpath
|
563 |
+
else:
|
564 |
+
source_is_str = isinstance(lpath, str)
|
565 |
+
if source_is_str:
|
566 |
+
lpath = make_path_posix(lpath)
|
567 |
+
fs = LocalFileSystem()
|
568 |
+
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
|
569 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
570 |
+
# Non-recursive glob does not copy directories
|
571 |
+
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
|
572 |
+
if not lpaths:
|
573 |
+
return
|
574 |
+
|
575 |
+
source_is_file = len(lpaths) == 1
|
576 |
+
dest_is_dir = isinstance(rpath, str) and (
|
577 |
+
trailing_sep(rpath) or await self._isdir(rpath)
|
578 |
+
)
|
579 |
+
|
580 |
+
rpath = self._strip_protocol(rpath)
|
581 |
+
exists = source_is_str and (
|
582 |
+
(has_magic(lpath) and source_is_file)
|
583 |
+
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
|
584 |
+
)
|
585 |
+
rpaths = other_paths(
|
586 |
+
lpaths,
|
587 |
+
rpath,
|
588 |
+
exists=exists,
|
589 |
+
flatten=not source_is_str,
|
590 |
+
)
|
591 |
+
|
592 |
+
is_dir = {l: os.path.isdir(l) for l in lpaths}
|
593 |
+
rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
|
594 |
+
file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
|
595 |
+
|
596 |
+
await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
|
597 |
+
batch_size = batch_size or self.batch_size
|
598 |
+
|
599 |
+
coros = []
|
600 |
+
callback.set_size(len(file_pairs))
|
601 |
+
for lfile, rfile in file_pairs:
|
602 |
+
put_file = callback.branch_coro(self._put_file)
|
603 |
+
coros.append(put_file(lfile, rfile, **kwargs))
|
604 |
+
|
605 |
+
return await _run_coros_in_chunks(
|
606 |
+
coros, batch_size=batch_size, callback=callback
|
607 |
+
)
|
608 |
+
|
609 |
+
async def _get_file(self, rpath, lpath, **kwargs):
|
610 |
+
raise NotImplementedError
|
611 |
+
|
612 |
+
async def _get(
|
613 |
+
self,
|
614 |
+
rpath,
|
615 |
+
lpath,
|
616 |
+
recursive=False,
|
617 |
+
callback=DEFAULT_CALLBACK,
|
618 |
+
maxdepth=None,
|
619 |
+
**kwargs,
|
620 |
+
):
|
621 |
+
"""Copy file(s) to local.
|
622 |
+
|
623 |
+
Copies a specific file or tree of files (if recursive=True). If lpath
|
624 |
+
ends with a "/", it will be assumed to be a directory, and target files
|
625 |
+
will go within. Can submit a list of paths, which may be glob-patterns
|
626 |
+
and will be expanded.
|
627 |
+
|
628 |
+
The get_file method will be called concurrently on a batch of files. The
|
629 |
+
batch_size option can configure the amount of futures that can be executed
|
630 |
+
at the same time. If it is -1, then all the files will be uploaded concurrently.
|
631 |
+
The default can be set for this instance by passing "batch_size" in the
|
632 |
+
constructor, or for all instances by setting the "gather_batch_size" key
|
633 |
+
in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
|
634 |
+
"""
|
635 |
+
if isinstance(lpath, list) and isinstance(rpath, list):
|
636 |
+
# No need to expand paths when both source and destination
|
637 |
+
# are provided as lists
|
638 |
+
rpaths = rpath
|
639 |
+
lpaths = lpath
|
640 |
+
else:
|
641 |
+
source_is_str = isinstance(rpath, str)
|
642 |
+
# First check for rpath trailing slash as _strip_protocol removes it.
|
643 |
+
source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
|
644 |
+
rpath = self._strip_protocol(rpath)
|
645 |
+
rpaths = await self._expand_path(
|
646 |
+
rpath, recursive=recursive, maxdepth=maxdepth
|
647 |
+
)
|
648 |
+
if source_is_str and (not recursive or maxdepth is not None):
|
649 |
+
# Non-recursive glob does not copy directories
|
650 |
+
rpaths = [
|
651 |
+
p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
|
652 |
+
]
|
653 |
+
if not rpaths:
|
654 |
+
return
|
655 |
+
|
656 |
+
lpath = make_path_posix(lpath)
|
657 |
+
source_is_file = len(rpaths) == 1
|
658 |
+
dest_is_dir = isinstance(lpath, str) and (
|
659 |
+
trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
|
660 |
+
)
|
661 |
+
|
662 |
+
exists = source_is_str and (
|
663 |
+
(has_magic(rpath) and source_is_file)
|
664 |
+
or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
|
665 |
+
)
|
666 |
+
lpaths = other_paths(
|
667 |
+
rpaths,
|
668 |
+
lpath,
|
669 |
+
exists=exists,
|
670 |
+
flatten=not source_is_str,
|
671 |
+
)
|
672 |
+
|
673 |
+
[os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
|
674 |
+
batch_size = kwargs.pop("batch_size", self.batch_size)
|
675 |
+
|
676 |
+
coros = []
|
677 |
+
callback.set_size(len(lpaths))
|
678 |
+
for lpath, rpath in zip(lpaths, rpaths):
|
679 |
+
get_file = callback.branch_coro(self._get_file)
|
680 |
+
coros.append(get_file(rpath, lpath, **kwargs))
|
681 |
+
return await _run_coros_in_chunks(
|
682 |
+
coros, batch_size=batch_size, callback=callback
|
683 |
+
)
|
684 |
+
|
685 |
+
async def _isfile(self, path):
|
686 |
+
try:
|
687 |
+
return (await self._info(path))["type"] == "file"
|
688 |
+
except: # noqa: E722
|
689 |
+
return False
|
690 |
+
|
691 |
+
async def _isdir(self, path):
|
692 |
+
try:
|
693 |
+
return (await self._info(path))["type"] == "directory"
|
694 |
+
except OSError:
|
695 |
+
return False
|
696 |
+
|
697 |
+
async def _size(self, path):
|
698 |
+
return (await self._info(path)).get("size", None)
|
699 |
+
|
700 |
+
async def _sizes(self, paths, batch_size=None):
|
701 |
+
batch_size = batch_size or self.batch_size
|
702 |
+
return await _run_coros_in_chunks(
|
703 |
+
[self._size(p) for p in paths], batch_size=batch_size
|
704 |
+
)
|
705 |
+
|
706 |
+
async def _exists(self, path, **kwargs):
|
707 |
+
try:
|
708 |
+
await self._info(path, **kwargs)
|
709 |
+
return True
|
710 |
+
except FileNotFoundError:
|
711 |
+
return False
|
712 |
+
|
713 |
+
async def _info(self, path, **kwargs):
|
714 |
+
raise NotImplementedError
|
715 |
+
|
716 |
+
async def _ls(self, path, detail=True, **kwargs):
|
717 |
+
raise NotImplementedError
|
718 |
+
|
719 |
+
async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
|
720 |
+
if maxdepth is not None and maxdepth < 1:
|
721 |
+
raise ValueError("maxdepth must be at least 1")
|
722 |
+
|
723 |
+
path = self._strip_protocol(path)
|
724 |
+
full_dirs = {}
|
725 |
+
dirs = {}
|
726 |
+
files = {}
|
727 |
+
|
728 |
+
detail = kwargs.pop("detail", False)
|
729 |
+
try:
|
730 |
+
listing = await self._ls(path, detail=True, **kwargs)
|
731 |
+
except (FileNotFoundError, OSError) as e:
|
732 |
+
if on_error == "raise":
|
733 |
+
raise
|
734 |
+
elif callable(on_error):
|
735 |
+
on_error(e)
|
736 |
+
if detail:
|
737 |
+
yield path, {}, {}
|
738 |
+
else:
|
739 |
+
yield path, [], []
|
740 |
+
return
|
741 |
+
|
742 |
+
for info in listing:
|
743 |
+
# each info name must be at least [path]/part , but here
|
744 |
+
# we check also for names like [path]/part/
|
745 |
+
pathname = info["name"].rstrip("/")
|
746 |
+
name = pathname.rsplit("/", 1)[-1]
|
747 |
+
if info["type"] == "directory" and pathname != path:
|
748 |
+
# do not include "self" path
|
749 |
+
full_dirs[name] = pathname
|
750 |
+
dirs[name] = info
|
751 |
+
elif pathname == path:
|
752 |
+
# file-like with same name as give path
|
753 |
+
files[""] = info
|
754 |
+
else:
|
755 |
+
files[name] = info
|
756 |
+
|
757 |
+
if detail:
|
758 |
+
yield path, dirs, files
|
759 |
+
else:
|
760 |
+
yield path, list(dirs), list(files)
|
761 |
+
|
762 |
+
if maxdepth is not None:
|
763 |
+
maxdepth -= 1
|
764 |
+
if maxdepth < 1:
|
765 |
+
return
|
766 |
+
|
767 |
+
for d in dirs:
|
768 |
+
async for _ in self._walk(
|
769 |
+
full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
|
770 |
+
):
|
771 |
+
yield _
|
772 |
+
|
773 |
+
async def _glob(self, path, maxdepth=None, **kwargs):
|
774 |
+
if maxdepth is not None and maxdepth < 1:
|
775 |
+
raise ValueError("maxdepth must be at least 1")
|
776 |
+
|
777 |
+
import re
|
778 |
+
|
779 |
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
780 |
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
781 |
+
path = self._strip_protocol(path)
|
782 |
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
783 |
+
tuple(sep + "**" for sep in seps)
|
784 |
+
)
|
785 |
+
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
786 |
+
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
787 |
+
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
788 |
+
|
789 |
+
min_idx = min(idx_star, idx_qmark, idx_brace)
|
790 |
+
|
791 |
+
detail = kwargs.pop("detail", False)
|
792 |
+
|
793 |
+
if not has_magic(path):
|
794 |
+
if await self._exists(path, **kwargs):
|
795 |
+
if not detail:
|
796 |
+
return [path]
|
797 |
+
else:
|
798 |
+
return {path: await self._info(path, **kwargs)}
|
799 |
+
else:
|
800 |
+
if not detail:
|
801 |
+
return [] # glob of non-existent returns empty
|
802 |
+
else:
|
803 |
+
return {}
|
804 |
+
elif "/" in path[:min_idx]:
|
805 |
+
min_idx = path[:min_idx].rindex("/")
|
806 |
+
root = path[: min_idx + 1]
|
807 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
808 |
+
else:
|
809 |
+
root = ""
|
810 |
+
depth = path[min_idx + 1 :].count("/") + 1
|
811 |
+
|
812 |
+
if "**" in path:
|
813 |
+
if maxdepth is not None:
|
814 |
+
idx_double_stars = path.find("**")
|
815 |
+
depth_double_stars = path[idx_double_stars:].count("/") + 1
|
816 |
+
depth = depth - depth_double_stars + maxdepth
|
817 |
+
else:
|
818 |
+
depth = None
|
819 |
+
|
820 |
+
allpaths = await self._find(
|
821 |
+
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
|
822 |
+
)
|
823 |
+
|
824 |
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
825 |
+
pattern = re.compile(pattern)
|
826 |
+
|
827 |
+
out = {
|
828 |
+
p: info
|
829 |
+
for p, info in sorted(allpaths.items())
|
830 |
+
if pattern.match(
|
831 |
+
p + "/"
|
832 |
+
if append_slash_to_dirname and info["type"] == "directory"
|
833 |
+
else p
|
834 |
+
)
|
835 |
+
}
|
836 |
+
|
837 |
+
if detail:
|
838 |
+
return out
|
839 |
+
else:
|
840 |
+
return list(out)
|
841 |
+
|
842 |
+
async def _du(self, path, total=True, maxdepth=None, **kwargs):
|
843 |
+
sizes = {}
|
844 |
+
# async for?
|
845 |
+
for f in await self._find(path, maxdepth=maxdepth, **kwargs):
|
846 |
+
info = await self._info(f)
|
847 |
+
sizes[info["name"]] = info["size"]
|
848 |
+
if total:
|
849 |
+
return sum(sizes.values())
|
850 |
+
else:
|
851 |
+
return sizes
|
852 |
+
|
853 |
+
async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
|
854 |
+
path = self._strip_protocol(path)
|
855 |
+
out = {}
|
856 |
+
detail = kwargs.pop("detail", False)
|
857 |
+
|
858 |
+
# Add the root directory if withdirs is requested
|
859 |
+
# This is needed for posix glob compliance
|
860 |
+
if withdirs and path != "" and await self._isdir(path):
|
861 |
+
out[path] = await self._info(path)
|
862 |
+
|
863 |
+
# async for?
|
864 |
+
async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
|
865 |
+
if withdirs:
|
866 |
+
files.update(dirs)
|
867 |
+
out.update({info["name"]: info for name, info in files.items()})
|
868 |
+
if not out and (await self._isfile(path)):
|
869 |
+
# walk works on directories, but find should also return [path]
|
870 |
+
# when path happens to be a file
|
871 |
+
out[path] = {}
|
872 |
+
names = sorted(out)
|
873 |
+
if not detail:
|
874 |
+
return names
|
875 |
+
else:
|
876 |
+
return {name: out[name] for name in names}
|
877 |
+
|
878 |
+
async def _expand_path(self, path, recursive=False, maxdepth=None):
|
879 |
+
if maxdepth is not None and maxdepth < 1:
|
880 |
+
raise ValueError("maxdepth must be at least 1")
|
881 |
+
|
882 |
+
if isinstance(path, str):
|
883 |
+
out = await self._expand_path([path], recursive, maxdepth)
|
884 |
+
else:
|
885 |
+
out = set()
|
886 |
+
path = [self._strip_protocol(p) for p in path]
|
887 |
+
for p in path: # can gather here
|
888 |
+
if has_magic(p):
|
889 |
+
bit = set(await self._glob(p, maxdepth=maxdepth))
|
890 |
+
out |= bit
|
891 |
+
if recursive:
|
892 |
+
# glob call above expanded one depth so if maxdepth is defined
|
893 |
+
# then decrement it in expand_path call below. If it is zero
|
894 |
+
# after decrementing then avoid expand_path call.
|
895 |
+
if maxdepth is not None and maxdepth <= 1:
|
896 |
+
continue
|
897 |
+
out |= set(
|
898 |
+
await self._expand_path(
|
899 |
+
list(bit),
|
900 |
+
recursive=recursive,
|
901 |
+
maxdepth=maxdepth - 1 if maxdepth is not None else None,
|
902 |
+
)
|
903 |
+
)
|
904 |
+
continue
|
905 |
+
elif recursive:
|
906 |
+
rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
|
907 |
+
out |= rec
|
908 |
+
if p not in out and (recursive is False or (await self._exists(p))):
|
909 |
+
# should only check once, for the root
|
910 |
+
out.add(p)
|
911 |
+
if not out:
|
912 |
+
raise FileNotFoundError(path)
|
913 |
+
return sorted(out)
|
914 |
+
|
915 |
+
async def _mkdir(self, path, create_parents=True, **kwargs):
|
916 |
+
pass # not necessary to implement, may not have directories
|
917 |
+
|
918 |
+
async def _makedirs(self, path, exist_ok=False):
|
919 |
+
pass # not necessary to implement, may not have directories
|
920 |
+
|
921 |
+
async def open_async(self, path, mode="rb", **kwargs):
|
922 |
+
if "b" not in mode or kwargs.get("compression"):
|
923 |
+
raise ValueError
|
924 |
+
raise NotImplementedError
|
925 |
+
|
926 |
+
|
927 |
+
def mirror_sync_methods(obj):
|
928 |
+
"""Populate sync and async methods for obj
|
929 |
+
|
930 |
+
For each method will create a sync version if the name refers to an async method
|
931 |
+
(coroutine) and there is no override in the child class; will create an async
|
932 |
+
method for the corresponding sync method if there is no implementation.
|
933 |
+
|
934 |
+
Uses the methods specified in
|
935 |
+
- async_methods: the set that an implementation is expected to provide
|
936 |
+
- default_async_methods: that can be derived from their sync version in
|
937 |
+
AbstractFileSystem
|
938 |
+
- AsyncFileSystem: async-specific default coroutines
|
939 |
+
"""
|
940 |
+
from fsspec import AbstractFileSystem
|
941 |
+
|
942 |
+
for method in async_methods + dir(AsyncFileSystem):
|
943 |
+
if not method.startswith("_"):
|
944 |
+
continue
|
945 |
+
smethod = method[1:]
|
946 |
+
if private.match(method):
|
947 |
+
isco = inspect.iscoroutinefunction(getattr(obj, method, None))
|
948 |
+
unsync = getattr(getattr(obj, smethod, False), "__func__", None)
|
949 |
+
is_default = unsync is getattr(AbstractFileSystem, smethod, "")
|
950 |
+
if isco and is_default:
|
951 |
+
mth = sync_wrapper(getattr(obj, method), obj=obj)
|
952 |
+
setattr(obj, smethod, mth)
|
953 |
+
if not mth.__doc__:
|
954 |
+
mth.__doc__ = getattr(
|
955 |
+
getattr(AbstractFileSystem, smethod, None), "__doc__", ""
|
956 |
+
)
|
957 |
+
|
958 |
+
|
959 |
+
class FSSpecCoroutineCancel(Exception):
|
960 |
+
pass
|
961 |
+
|
962 |
+
|
963 |
+
def _dump_running_tasks(
|
964 |
+
printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
|
965 |
+
):
|
966 |
+
import traceback
|
967 |
+
|
968 |
+
tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
|
969 |
+
if printout:
|
970 |
+
[task.print_stack() for task in tasks]
|
971 |
+
out = [
|
972 |
+
{
|
973 |
+
"locals": task._coro.cr_frame.f_locals,
|
974 |
+
"file": task._coro.cr_frame.f_code.co_filename,
|
975 |
+
"firstline": task._coro.cr_frame.f_code.co_firstlineno,
|
976 |
+
"linelo": task._coro.cr_frame.f_lineno,
|
977 |
+
"stack": traceback.format_stack(task._coro.cr_frame),
|
978 |
+
"task": task if with_task else None,
|
979 |
+
}
|
980 |
+
for task in tasks
|
981 |
+
]
|
982 |
+
if cancel:
|
983 |
+
for t in tasks:
|
984 |
+
cbs = t._callbacks
|
985 |
+
t.cancel()
|
986 |
+
asyncio.futures.Future.set_exception(t, exc)
|
987 |
+
asyncio.futures.Future.cancel(t)
|
988 |
+
[cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures
|
989 |
+
try:
|
990 |
+
t._coro.throw(exc) # exits coro, unless explicitly handled
|
991 |
+
except exc:
|
992 |
+
pass
|
993 |
+
return out
|
994 |
+
|
995 |
+
|
996 |
+
class AbstractAsyncStreamedFile(AbstractBufferedFile):
|
997 |
+
# no read buffering, and always auto-commit
|
998 |
+
# TODO: readahead might still be useful here, but needs async version
|
999 |
+
|
1000 |
+
async def read(self, length=-1):
|
1001 |
+
"""
|
1002 |
+
Return data from cache, or fetch pieces as necessary
|
1003 |
+
|
1004 |
+
Parameters
|
1005 |
+
----------
|
1006 |
+
length: int (-1)
|
1007 |
+
Number of bytes to read; if <0, all remaining bytes.
|
1008 |
+
"""
|
1009 |
+
length = -1 if length is None else int(length)
|
1010 |
+
if self.mode != "rb":
|
1011 |
+
raise ValueError("File not in read mode")
|
1012 |
+
if length < 0:
|
1013 |
+
length = self.size - self.loc
|
1014 |
+
if self.closed:
|
1015 |
+
raise ValueError("I/O operation on closed file.")
|
1016 |
+
if length == 0:
|
1017 |
+
# don't even bother calling fetch
|
1018 |
+
return b""
|
1019 |
+
out = await self._fetch_range(self.loc, self.loc + length)
|
1020 |
+
self.loc += len(out)
|
1021 |
+
return out
|
1022 |
+
|
1023 |
+
async def write(self, data):
|
1024 |
+
"""
|
1025 |
+
Write data to buffer.
|
1026 |
+
|
1027 |
+
Buffer only sent on flush() or if buffer is greater than
|
1028 |
+
or equal to blocksize.
|
1029 |
+
|
1030 |
+
Parameters
|
1031 |
+
----------
|
1032 |
+
data: bytes
|
1033 |
+
Set of bytes to be written.
|
1034 |
+
"""
|
1035 |
+
if self.mode not in {"wb", "ab"}:
|
1036 |
+
raise ValueError("File not in write mode")
|
1037 |
+
if self.closed:
|
1038 |
+
raise ValueError("I/O operation on closed file.")
|
1039 |
+
if self.forced:
|
1040 |
+
raise ValueError("This file has been force-flushed, can only close")
|
1041 |
+
out = self.buffer.write(data)
|
1042 |
+
self.loc += out
|
1043 |
+
if self.buffer.tell() >= self.blocksize:
|
1044 |
+
await self.flush()
|
1045 |
+
return out
|
1046 |
+
|
1047 |
+
async def close(self):
|
1048 |
+
"""Close file
|
1049 |
+
|
1050 |
+
Finalizes writes, discards cache
|
1051 |
+
"""
|
1052 |
+
if getattr(self, "_unclosable", False):
|
1053 |
+
return
|
1054 |
+
if self.closed:
|
1055 |
+
return
|
1056 |
+
if self.mode == "rb":
|
1057 |
+
self.cache = None
|
1058 |
+
else:
|
1059 |
+
if not self.forced:
|
1060 |
+
await self.flush(force=True)
|
1061 |
+
|
1062 |
+
if self.fs is not None:
|
1063 |
+
self.fs.invalidate_cache(self.path)
|
1064 |
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
1065 |
+
|
1066 |
+
self.closed = True
|
1067 |
+
|
1068 |
+
async def flush(self, force=False):
|
1069 |
+
if self.closed:
|
1070 |
+
raise ValueError("Flush on closed file")
|
1071 |
+
if force and self.forced:
|
1072 |
+
raise ValueError("Force flush cannot be called more than once")
|
1073 |
+
if force:
|
1074 |
+
self.forced = True
|
1075 |
+
|
1076 |
+
if self.mode not in {"wb", "ab"}:
|
1077 |
+
# no-op to flush on read-mode
|
1078 |
+
return
|
1079 |
+
|
1080 |
+
if not force and self.buffer.tell() < self.blocksize:
|
1081 |
+
# Defer write on small block
|
1082 |
+
return
|
1083 |
+
|
1084 |
+
if self.offset is None:
|
1085 |
+
# Initialize a multipart upload
|
1086 |
+
self.offset = 0
|
1087 |
+
try:
|
1088 |
+
await self._initiate_upload()
|
1089 |
+
except:
|
1090 |
+
self.closed = True
|
1091 |
+
raise
|
1092 |
+
|
1093 |
+
if await self._upload_chunk(final=force) is not False:
|
1094 |
+
self.offset += self.buffer.seek(0, 2)
|
1095 |
+
self.buffer = io.BytesIO()
|
1096 |
+
|
1097 |
+
async def __aenter__(self):
|
1098 |
+
return self
|
1099 |
+
|
1100 |
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
1101 |
+
await self.close()
|
1102 |
+
|
1103 |
+
async def _fetch_range(self, start, end):
|
1104 |
+
raise NotImplementedError
|
1105 |
+
|
1106 |
+
async def _initiate_upload(self):
|
1107 |
+
pass
|
1108 |
+
|
1109 |
+
async def _upload_chunk(self, final=False):
|
1110 |
+
raise NotImplementedError
|
venv/lib/python3.12/site-packages/fsspec/caching.py
ADDED
@@ -0,0 +1,1005 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import collections
|
4 |
+
import functools
|
5 |
+
import logging
|
6 |
+
import math
|
7 |
+
import os
|
8 |
+
import threading
|
9 |
+
import warnings
|
10 |
+
from concurrent.futures import Future, ThreadPoolExecutor
|
11 |
+
from itertools import groupby
|
12 |
+
from operator import itemgetter
|
13 |
+
from typing import (
|
14 |
+
TYPE_CHECKING,
|
15 |
+
Any,
|
16 |
+
Callable,
|
17 |
+
ClassVar,
|
18 |
+
Generic,
|
19 |
+
NamedTuple,
|
20 |
+
Optional,
|
21 |
+
OrderedDict,
|
22 |
+
TypeVar,
|
23 |
+
)
|
24 |
+
|
25 |
+
if TYPE_CHECKING:
|
26 |
+
import mmap
|
27 |
+
|
28 |
+
from typing_extensions import ParamSpec
|
29 |
+
|
30 |
+
P = ParamSpec("P")
|
31 |
+
else:
|
32 |
+
P = TypeVar("P")
|
33 |
+
|
34 |
+
T = TypeVar("T")
|
35 |
+
|
36 |
+
|
37 |
+
logger = logging.getLogger("fsspec")
|
38 |
+
|
39 |
+
Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
|
40 |
+
MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes
|
41 |
+
|
42 |
+
|
43 |
+
class BaseCache:
|
44 |
+
"""Pass-though cache: doesn't keep anything, calls every time
|
45 |
+
|
46 |
+
Acts as base class for other cachers
|
47 |
+
|
48 |
+
Parameters
|
49 |
+
----------
|
50 |
+
blocksize: int
|
51 |
+
How far to read ahead in numbers of bytes
|
52 |
+
fetcher: func
|
53 |
+
Function of the form f(start, end) which gets bytes from remote as
|
54 |
+
specified
|
55 |
+
size: int
|
56 |
+
How big this file is
|
57 |
+
"""
|
58 |
+
|
59 |
+
name: ClassVar[str] = "none"
|
60 |
+
|
61 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
62 |
+
self.blocksize = blocksize
|
63 |
+
self.nblocks = 0
|
64 |
+
self.fetcher = fetcher
|
65 |
+
self.size = size
|
66 |
+
self.hit_count = 0
|
67 |
+
self.miss_count = 0
|
68 |
+
# the bytes that we actually requested
|
69 |
+
self.total_requested_bytes = 0
|
70 |
+
|
71 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
72 |
+
if start is None:
|
73 |
+
start = 0
|
74 |
+
if stop is None:
|
75 |
+
stop = self.size
|
76 |
+
if start >= self.size or start >= stop:
|
77 |
+
return b""
|
78 |
+
return self.fetcher(start, stop)
|
79 |
+
|
80 |
+
def _reset_stats(self) -> None:
|
81 |
+
"""Reset hit and miss counts for a more ganular report e.g. by file."""
|
82 |
+
self.hit_count = 0
|
83 |
+
self.miss_count = 0
|
84 |
+
self.total_requested_bytes = 0
|
85 |
+
|
86 |
+
def _log_stats(self) -> str:
|
87 |
+
"""Return a formatted string of the cache statistics."""
|
88 |
+
if self.hit_count == 0 and self.miss_count == 0:
|
89 |
+
# a cache that does nothing, this is for logs only
|
90 |
+
return ""
|
91 |
+
return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
|
92 |
+
|
93 |
+
def __repr__(self) -> str:
|
94 |
+
# TODO: use rich for better formatting
|
95 |
+
return f"""
|
96 |
+
<{self.__class__.__name__}:
|
97 |
+
block size : {self.blocksize}
|
98 |
+
block count : {self.nblocks}
|
99 |
+
file size : {self.size}
|
100 |
+
cache hits : {self.hit_count}
|
101 |
+
cache misses: {self.miss_count}
|
102 |
+
total requested bytes: {self.total_requested_bytes}>
|
103 |
+
"""
|
104 |
+
|
105 |
+
|
106 |
+
class MMapCache(BaseCache):
|
107 |
+
"""memory-mapped sparse file cache
|
108 |
+
|
109 |
+
Opens temporary file, which is filled blocks-wise when data is requested.
|
110 |
+
Ensure there is enough disc space in the temporary location.
|
111 |
+
|
112 |
+
This cache method might only work on posix
|
113 |
+
|
114 |
+
Parameters
|
115 |
+
----------
|
116 |
+
blocksize: int
|
117 |
+
How far to read ahead in numbers of bytes
|
118 |
+
fetcher: Fetcher
|
119 |
+
Function of the form f(start, end) which gets bytes from remote as
|
120 |
+
specified
|
121 |
+
size: int
|
122 |
+
How big this file is
|
123 |
+
location: str
|
124 |
+
Where to create the temporary file. If None, a temporary file is
|
125 |
+
created using tempfile.TemporaryFile().
|
126 |
+
blocks: set[int]
|
127 |
+
Set of block numbers that have already been fetched. If None, an empty
|
128 |
+
set is created.
|
129 |
+
multi_fetcher: MultiFetcher
|
130 |
+
Function of the form f([(start, end)]) which gets bytes from remote
|
131 |
+
as specified. This function is used to fetch multiple blocks at once.
|
132 |
+
If not specified, the fetcher function is used instead.
|
133 |
+
"""
|
134 |
+
|
135 |
+
name = "mmap"
|
136 |
+
|
137 |
+
def __init__(
|
138 |
+
self,
|
139 |
+
blocksize: int,
|
140 |
+
fetcher: Fetcher,
|
141 |
+
size: int,
|
142 |
+
location: str | None = None,
|
143 |
+
blocks: set[int] | None = None,
|
144 |
+
multi_fetcher: MultiFetcher | None = None,
|
145 |
+
) -> None:
|
146 |
+
super().__init__(blocksize, fetcher, size)
|
147 |
+
self.blocks = set() if blocks is None else blocks
|
148 |
+
self.location = location
|
149 |
+
self.multi_fetcher = multi_fetcher
|
150 |
+
self.cache = self._makefile()
|
151 |
+
|
152 |
+
def _makefile(self) -> mmap.mmap | bytearray:
|
153 |
+
import mmap
|
154 |
+
import tempfile
|
155 |
+
|
156 |
+
if self.size == 0:
|
157 |
+
return bytearray()
|
158 |
+
|
159 |
+
# posix version
|
160 |
+
if self.location is None or not os.path.exists(self.location):
|
161 |
+
if self.location is None:
|
162 |
+
fd = tempfile.TemporaryFile()
|
163 |
+
self.blocks = set()
|
164 |
+
else:
|
165 |
+
fd = open(self.location, "wb+")
|
166 |
+
fd.seek(self.size - 1)
|
167 |
+
fd.write(b"1")
|
168 |
+
fd.flush()
|
169 |
+
else:
|
170 |
+
fd = open(self.location, "r+b")
|
171 |
+
|
172 |
+
return mmap.mmap(fd.fileno(), self.size)
|
173 |
+
|
174 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
175 |
+
logger.debug(f"MMap cache fetching {start}-{end}")
|
176 |
+
if start is None:
|
177 |
+
start = 0
|
178 |
+
if end is None:
|
179 |
+
end = self.size
|
180 |
+
if start >= self.size or start >= end:
|
181 |
+
return b""
|
182 |
+
start_block = start // self.blocksize
|
183 |
+
end_block = end // self.blocksize
|
184 |
+
block_range = range(start_block, end_block + 1)
|
185 |
+
# Determine which blocks need to be fetched. This sequence is sorted by construction.
|
186 |
+
need = (i for i in block_range if i not in self.blocks)
|
187 |
+
# Count the number of blocks already cached
|
188 |
+
self.hit_count += sum(1 for i in block_range if i in self.blocks)
|
189 |
+
|
190 |
+
ranges = []
|
191 |
+
|
192 |
+
# Consolidate needed blocks.
|
193 |
+
# Algorithm adapted from Python 2.x itertools documentation.
|
194 |
+
# We are grouping an enumerated sequence of blocks. By comparing when the difference
|
195 |
+
# between an ascending range (provided by enumerate) and the needed block numbers
|
196 |
+
# we can detect when the block number skips values. The key computes this difference.
|
197 |
+
# Whenever the difference changes, we know that we have previously cached block(s),
|
198 |
+
# and a new group is started. In other words, this algorithm neatly groups
|
199 |
+
# runs of consecutive block numbers so they can be fetched together.
|
200 |
+
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
|
201 |
+
# Extract the blocks from the enumerated sequence
|
202 |
+
_blocks = tuple(map(itemgetter(1), _blocks))
|
203 |
+
# Compute start of first block
|
204 |
+
sstart = _blocks[0] * self.blocksize
|
205 |
+
# Compute the end of the last block. Last block may not be full size.
|
206 |
+
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
|
207 |
+
|
208 |
+
# Fetch bytes (could be multiple consecutive blocks)
|
209 |
+
self.total_requested_bytes += send - sstart
|
210 |
+
logger.debug(
|
211 |
+
f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
|
212 |
+
)
|
213 |
+
ranges.append((sstart, send))
|
214 |
+
|
215 |
+
# Update set of cached blocks
|
216 |
+
self.blocks.update(_blocks)
|
217 |
+
# Update cache statistics with number of blocks we had to cache
|
218 |
+
self.miss_count += len(_blocks)
|
219 |
+
|
220 |
+
if not ranges:
|
221 |
+
return self.cache[start:end]
|
222 |
+
|
223 |
+
if self.multi_fetcher:
|
224 |
+
logger.debug(f"MMap get blocks {ranges}")
|
225 |
+
for idx, r in enumerate(self.multi_fetcher(ranges)):
|
226 |
+
(sstart, send) = ranges[idx]
|
227 |
+
logger.debug(f"MMap copy block ({sstart}-{send}")
|
228 |
+
self.cache[sstart:send] = r
|
229 |
+
else:
|
230 |
+
for sstart, send in ranges:
|
231 |
+
logger.debug(f"MMap get block ({sstart}-{send}")
|
232 |
+
self.cache[sstart:send] = self.fetcher(sstart, send)
|
233 |
+
|
234 |
+
return self.cache[start:end]
|
235 |
+
|
236 |
+
def __getstate__(self) -> dict[str, Any]:
|
237 |
+
state = self.__dict__.copy()
|
238 |
+
# Remove the unpicklable entries.
|
239 |
+
del state["cache"]
|
240 |
+
return state
|
241 |
+
|
242 |
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
243 |
+
# Restore instance attributes
|
244 |
+
self.__dict__.update(state)
|
245 |
+
self.cache = self._makefile()
|
246 |
+
|
247 |
+
|
248 |
+
class ReadAheadCache(BaseCache):
|
249 |
+
"""Cache which reads only when we get beyond a block of data
|
250 |
+
|
251 |
+
This is a much simpler version of BytesCache, and does not attempt to
|
252 |
+
fill holes in the cache or keep fragments alive. It is best suited to
|
253 |
+
many small reads in a sequential order (e.g., reading lines from a file).
|
254 |
+
"""
|
255 |
+
|
256 |
+
name = "readahead"
|
257 |
+
|
258 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
259 |
+
super().__init__(blocksize, fetcher, size)
|
260 |
+
self.cache = b""
|
261 |
+
self.start = 0
|
262 |
+
self.end = 0
|
263 |
+
|
264 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
265 |
+
if start is None:
|
266 |
+
start = 0
|
267 |
+
if end is None or end > self.size:
|
268 |
+
end = self.size
|
269 |
+
if start >= self.size or start >= end:
|
270 |
+
return b""
|
271 |
+
l = end - start
|
272 |
+
if start >= self.start and end <= self.end:
|
273 |
+
# cache hit
|
274 |
+
self.hit_count += 1
|
275 |
+
return self.cache[start - self.start : end - self.start]
|
276 |
+
elif self.start <= start < self.end:
|
277 |
+
# partial hit
|
278 |
+
self.miss_count += 1
|
279 |
+
part = self.cache[start - self.start :]
|
280 |
+
l -= len(part)
|
281 |
+
start = self.end
|
282 |
+
else:
|
283 |
+
# miss
|
284 |
+
self.miss_count += 1
|
285 |
+
part = b""
|
286 |
+
end = min(self.size, end + self.blocksize)
|
287 |
+
self.total_requested_bytes += end - start
|
288 |
+
self.cache = self.fetcher(start, end) # new block replaces old
|
289 |
+
self.start = start
|
290 |
+
self.end = self.start + len(self.cache)
|
291 |
+
return part + self.cache[:l]
|
292 |
+
|
293 |
+
|
294 |
+
class FirstChunkCache(BaseCache):
|
295 |
+
"""Caches the first block of a file only
|
296 |
+
|
297 |
+
This may be useful for file types where the metadata is stored in the header,
|
298 |
+
but is randomly accessed.
|
299 |
+
"""
|
300 |
+
|
301 |
+
name = "first"
|
302 |
+
|
303 |
+
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
304 |
+
if blocksize > size:
|
305 |
+
# this will buffer the whole thing
|
306 |
+
blocksize = size
|
307 |
+
super().__init__(blocksize, fetcher, size)
|
308 |
+
self.cache: bytes | None = None
|
309 |
+
|
310 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
311 |
+
start = start or 0
|
312 |
+
if start > self.size:
|
313 |
+
logger.debug("FirstChunkCache: requested start > file size")
|
314 |
+
return b""
|
315 |
+
|
316 |
+
end = min(end, self.size)
|
317 |
+
|
318 |
+
if start < self.blocksize:
|
319 |
+
if self.cache is None:
|
320 |
+
self.miss_count += 1
|
321 |
+
if end > self.blocksize:
|
322 |
+
self.total_requested_bytes += end
|
323 |
+
data = self.fetcher(0, end)
|
324 |
+
self.cache = data[: self.blocksize]
|
325 |
+
return data[start:]
|
326 |
+
self.cache = self.fetcher(0, self.blocksize)
|
327 |
+
self.total_requested_bytes += self.blocksize
|
328 |
+
part = self.cache[start:end]
|
329 |
+
if end > self.blocksize:
|
330 |
+
self.total_requested_bytes += end - self.blocksize
|
331 |
+
part += self.fetcher(self.blocksize, end)
|
332 |
+
self.hit_count += 1
|
333 |
+
return part
|
334 |
+
else:
|
335 |
+
self.miss_count += 1
|
336 |
+
self.total_requested_bytes += end - start
|
337 |
+
return self.fetcher(start, end)
|
338 |
+
|
339 |
+
|
340 |
+
class BlockCache(BaseCache):
|
341 |
+
"""
|
342 |
+
Cache holding memory as a set of blocks.
|
343 |
+
|
344 |
+
Requests are only ever made ``blocksize`` at a time, and are
|
345 |
+
stored in an LRU cache. The least recently accessed block is
|
346 |
+
discarded when more than ``maxblocks`` are stored.
|
347 |
+
|
348 |
+
Parameters
|
349 |
+
----------
|
350 |
+
blocksize : int
|
351 |
+
The number of bytes to store in each block.
|
352 |
+
Requests are only ever made for ``blocksize``, so this
|
353 |
+
should balance the overhead of making a request against
|
354 |
+
the granularity of the blocks.
|
355 |
+
fetcher : Callable
|
356 |
+
size : int
|
357 |
+
The total size of the file being cached.
|
358 |
+
maxblocks : int
|
359 |
+
The maximum number of blocks to cache for. The maximum memory
|
360 |
+
use for this cache is then ``blocksize * maxblocks``.
|
361 |
+
"""
|
362 |
+
|
363 |
+
name = "blockcache"
|
364 |
+
|
365 |
+
def __init__(
|
366 |
+
self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
|
367 |
+
) -> None:
|
368 |
+
super().__init__(blocksize, fetcher, size)
|
369 |
+
self.nblocks = math.ceil(size / blocksize)
|
370 |
+
self.maxblocks = maxblocks
|
371 |
+
self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
|
372 |
+
|
373 |
+
def cache_info(self):
|
374 |
+
"""
|
375 |
+
The statistics on the block cache.
|
376 |
+
|
377 |
+
Returns
|
378 |
+
-------
|
379 |
+
NamedTuple
|
380 |
+
Returned directly from the LRU Cache used internally.
|
381 |
+
"""
|
382 |
+
return self._fetch_block_cached.cache_info()
|
383 |
+
|
384 |
+
def __getstate__(self) -> dict[str, Any]:
|
385 |
+
state = self.__dict__
|
386 |
+
del state["_fetch_block_cached"]
|
387 |
+
return state
|
388 |
+
|
389 |
+
def __setstate__(self, state: dict[str, Any]) -> None:
|
390 |
+
self.__dict__.update(state)
|
391 |
+
self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
|
392 |
+
self._fetch_block
|
393 |
+
)
|
394 |
+
|
395 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
396 |
+
if start is None:
|
397 |
+
start = 0
|
398 |
+
if end is None:
|
399 |
+
end = self.size
|
400 |
+
if start >= self.size or start >= end:
|
401 |
+
return b""
|
402 |
+
|
403 |
+
# byte position -> block numbers
|
404 |
+
start_block_number = start // self.blocksize
|
405 |
+
end_block_number = end // self.blocksize
|
406 |
+
|
407 |
+
# these are cached, so safe to do multiple calls for the same start and end.
|
408 |
+
for block_number in range(start_block_number, end_block_number + 1):
|
409 |
+
self._fetch_block_cached(block_number)
|
410 |
+
|
411 |
+
return self._read_cache(
|
412 |
+
start,
|
413 |
+
end,
|
414 |
+
start_block_number=start_block_number,
|
415 |
+
end_block_number=end_block_number,
|
416 |
+
)
|
417 |
+
|
418 |
+
def _fetch_block(self, block_number: int) -> bytes:
|
419 |
+
"""
|
420 |
+
Fetch the block of data for `block_number`.
|
421 |
+
"""
|
422 |
+
if block_number > self.nblocks:
|
423 |
+
raise ValueError(
|
424 |
+
f"'block_number={block_number}' is greater than "
|
425 |
+
f"the number of blocks ({self.nblocks})"
|
426 |
+
)
|
427 |
+
|
428 |
+
start = block_number * self.blocksize
|
429 |
+
end = start + self.blocksize
|
430 |
+
self.total_requested_bytes += end - start
|
431 |
+
self.miss_count += 1
|
432 |
+
logger.info("BlockCache fetching block %d", block_number)
|
433 |
+
block_contents = super()._fetch(start, end)
|
434 |
+
return block_contents
|
435 |
+
|
436 |
+
def _read_cache(
|
437 |
+
self, start: int, end: int, start_block_number: int, end_block_number: int
|
438 |
+
) -> bytes:
|
439 |
+
"""
|
440 |
+
Read from our block cache.
|
441 |
+
|
442 |
+
Parameters
|
443 |
+
----------
|
444 |
+
start, end : int
|
445 |
+
The start and end byte positions.
|
446 |
+
start_block_number, end_block_number : int
|
447 |
+
The start and end block numbers.
|
448 |
+
"""
|
449 |
+
start_pos = start % self.blocksize
|
450 |
+
end_pos = end % self.blocksize
|
451 |
+
|
452 |
+
self.hit_count += 1
|
453 |
+
if start_block_number == end_block_number:
|
454 |
+
block: bytes = self._fetch_block_cached(start_block_number)
|
455 |
+
return block[start_pos:end_pos]
|
456 |
+
|
457 |
+
else:
|
458 |
+
# read from the initial
|
459 |
+
out = [self._fetch_block_cached(start_block_number)[start_pos:]]
|
460 |
+
|
461 |
+
# intermediate blocks
|
462 |
+
# Note: it'd be nice to combine these into one big request. However
|
463 |
+
# that doesn't play nicely with our LRU cache.
|
464 |
+
out.extend(
|
465 |
+
map(
|
466 |
+
self._fetch_block_cached,
|
467 |
+
range(start_block_number + 1, end_block_number),
|
468 |
+
)
|
469 |
+
)
|
470 |
+
|
471 |
+
# final block
|
472 |
+
out.append(self._fetch_block_cached(end_block_number)[:end_pos])
|
473 |
+
|
474 |
+
return b"".join(out)
|
475 |
+
|
476 |
+
|
477 |
+
class BytesCache(BaseCache):
|
478 |
+
"""Cache which holds data in a in-memory bytes object
|
479 |
+
|
480 |
+
Implements read-ahead by the block size, for semi-random reads progressing
|
481 |
+
through the file.
|
482 |
+
|
483 |
+
Parameters
|
484 |
+
----------
|
485 |
+
trim: bool
|
486 |
+
As we read more data, whether to discard the start of the buffer when
|
487 |
+
we are more than a blocksize ahead of it.
|
488 |
+
"""
|
489 |
+
|
490 |
+
name: ClassVar[str] = "bytes"
|
491 |
+
|
492 |
+
def __init__(
|
493 |
+
self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
|
494 |
+
) -> None:
|
495 |
+
super().__init__(blocksize, fetcher, size)
|
496 |
+
self.cache = b""
|
497 |
+
self.start: int | None = None
|
498 |
+
self.end: int | None = None
|
499 |
+
self.trim = trim
|
500 |
+
|
501 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
502 |
+
# TODO: only set start/end after fetch, in case it fails?
|
503 |
+
# is this where retry logic might go?
|
504 |
+
if start is None:
|
505 |
+
start = 0
|
506 |
+
if end is None:
|
507 |
+
end = self.size
|
508 |
+
if start >= self.size or start >= end:
|
509 |
+
return b""
|
510 |
+
if (
|
511 |
+
self.start is not None
|
512 |
+
and start >= self.start
|
513 |
+
and self.end is not None
|
514 |
+
and end < self.end
|
515 |
+
):
|
516 |
+
# cache hit: we have all the required data
|
517 |
+
offset = start - self.start
|
518 |
+
self.hit_count += 1
|
519 |
+
return self.cache[offset : offset + end - start]
|
520 |
+
|
521 |
+
if self.blocksize:
|
522 |
+
bend = min(self.size, end + self.blocksize)
|
523 |
+
else:
|
524 |
+
bend = end
|
525 |
+
|
526 |
+
if bend == start or start > self.size:
|
527 |
+
return b""
|
528 |
+
|
529 |
+
if (self.start is None or start < self.start) and (
|
530 |
+
self.end is None or end > self.end
|
531 |
+
):
|
532 |
+
# First read, or extending both before and after
|
533 |
+
self.total_requested_bytes += bend - start
|
534 |
+
self.miss_count += 1
|
535 |
+
self.cache = self.fetcher(start, bend)
|
536 |
+
self.start = start
|
537 |
+
else:
|
538 |
+
assert self.start is not None
|
539 |
+
assert self.end is not None
|
540 |
+
self.miss_count += 1
|
541 |
+
|
542 |
+
if start < self.start:
|
543 |
+
if self.end is None or self.end - end > self.blocksize:
|
544 |
+
self.total_requested_bytes += bend - start
|
545 |
+
self.cache = self.fetcher(start, bend)
|
546 |
+
self.start = start
|
547 |
+
else:
|
548 |
+
self.total_requested_bytes += self.start - start
|
549 |
+
new = self.fetcher(start, self.start)
|
550 |
+
self.start = start
|
551 |
+
self.cache = new + self.cache
|
552 |
+
elif self.end is not None and bend > self.end:
|
553 |
+
if self.end > self.size:
|
554 |
+
pass
|
555 |
+
elif end - self.end > self.blocksize:
|
556 |
+
self.total_requested_bytes += bend - start
|
557 |
+
self.cache = self.fetcher(start, bend)
|
558 |
+
self.start = start
|
559 |
+
else:
|
560 |
+
self.total_requested_bytes += bend - self.end
|
561 |
+
new = self.fetcher(self.end, bend)
|
562 |
+
self.cache = self.cache + new
|
563 |
+
|
564 |
+
self.end = self.start + len(self.cache)
|
565 |
+
offset = start - self.start
|
566 |
+
out = self.cache[offset : offset + end - start]
|
567 |
+
if self.trim:
|
568 |
+
num = (self.end - self.start) // (self.blocksize + 1)
|
569 |
+
if num > 1:
|
570 |
+
self.start += self.blocksize * num
|
571 |
+
self.cache = self.cache[self.blocksize * num :]
|
572 |
+
return out
|
573 |
+
|
574 |
+
def __len__(self) -> int:
|
575 |
+
return len(self.cache)
|
576 |
+
|
577 |
+
|
578 |
+
class AllBytes(BaseCache):
|
579 |
+
"""Cache entire contents of the file"""
|
580 |
+
|
581 |
+
name: ClassVar[str] = "all"
|
582 |
+
|
583 |
+
def __init__(
|
584 |
+
self,
|
585 |
+
blocksize: int | None = None,
|
586 |
+
fetcher: Fetcher | None = None,
|
587 |
+
size: int | None = None,
|
588 |
+
data: bytes | None = None,
|
589 |
+
) -> None:
|
590 |
+
super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
|
591 |
+
if data is None:
|
592 |
+
self.miss_count += 1
|
593 |
+
self.total_requested_bytes += self.size
|
594 |
+
data = self.fetcher(0, self.size)
|
595 |
+
self.data = data
|
596 |
+
|
597 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
598 |
+
self.hit_count += 1
|
599 |
+
return self.data[start:stop]
|
600 |
+
|
601 |
+
|
602 |
+
class KnownPartsOfAFile(BaseCache):
|
603 |
+
"""
|
604 |
+
Cache holding known file parts.
|
605 |
+
|
606 |
+
Parameters
|
607 |
+
----------
|
608 |
+
blocksize: int
|
609 |
+
How far to read ahead in numbers of bytes
|
610 |
+
fetcher: func
|
611 |
+
Function of the form f(start, end) which gets bytes from remote as
|
612 |
+
specified
|
613 |
+
size: int
|
614 |
+
How big this file is
|
615 |
+
data: dict
|
616 |
+
A dictionary mapping explicit `(start, stop)` file-offset tuples
|
617 |
+
with known bytes.
|
618 |
+
strict: bool, default True
|
619 |
+
Whether to fetch reads that go beyond a known byte-range boundary.
|
620 |
+
If `False`, any read that ends outside a known part will be zero
|
621 |
+
padded. Note that zero padding will not be used for reads that
|
622 |
+
begin outside a known byte-range.
|
623 |
+
"""
|
624 |
+
|
625 |
+
name: ClassVar[str] = "parts"
|
626 |
+
|
627 |
+
def __init__(
|
628 |
+
self,
|
629 |
+
blocksize: int,
|
630 |
+
fetcher: Fetcher,
|
631 |
+
size: int,
|
632 |
+
data: Optional[dict[tuple[int, int], bytes]] = None,
|
633 |
+
strict: bool = True,
|
634 |
+
**_: Any,
|
635 |
+
):
|
636 |
+
super().__init__(blocksize, fetcher, size)
|
637 |
+
self.strict = strict
|
638 |
+
|
639 |
+
# simple consolidation of contiguous blocks
|
640 |
+
if data:
|
641 |
+
old_offsets = sorted(data.keys())
|
642 |
+
offsets = [old_offsets[0]]
|
643 |
+
blocks = [data.pop(old_offsets[0])]
|
644 |
+
for start, stop in old_offsets[1:]:
|
645 |
+
start0, stop0 = offsets[-1]
|
646 |
+
if start == stop0:
|
647 |
+
offsets[-1] = (start0, stop)
|
648 |
+
blocks[-1] += data.pop((start, stop))
|
649 |
+
else:
|
650 |
+
offsets.append((start, stop))
|
651 |
+
blocks.append(data.pop((start, stop)))
|
652 |
+
|
653 |
+
self.data = dict(zip(offsets, blocks))
|
654 |
+
else:
|
655 |
+
self.data = {}
|
656 |
+
|
657 |
+
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
658 |
+
if start is None:
|
659 |
+
start = 0
|
660 |
+
if stop is None:
|
661 |
+
stop = self.size
|
662 |
+
|
663 |
+
out = b""
|
664 |
+
for (loc0, loc1), data in self.data.items():
|
665 |
+
# If self.strict=False, use zero-padded data
|
666 |
+
# for reads beyond the end of a "known" buffer
|
667 |
+
if loc0 <= start < loc1:
|
668 |
+
off = start - loc0
|
669 |
+
out = data[off : off + stop - start]
|
670 |
+
if not self.strict or loc0 <= stop <= loc1:
|
671 |
+
# The request is within a known range, or
|
672 |
+
# it begins within a known range, and we
|
673 |
+
# are allowed to pad reads beyond the
|
674 |
+
# buffer with zero
|
675 |
+
out += b"\x00" * (stop - start - len(out))
|
676 |
+
self.hit_count += 1
|
677 |
+
return out
|
678 |
+
else:
|
679 |
+
# The request ends outside a known range,
|
680 |
+
# and we are being "strict" about reads
|
681 |
+
# beyond the buffer
|
682 |
+
start = loc1
|
683 |
+
break
|
684 |
+
|
685 |
+
# We only get here if there is a request outside the
|
686 |
+
# known parts of the file. In an ideal world, this
|
687 |
+
# should never happen
|
688 |
+
if self.fetcher is None:
|
689 |
+
# We cannot fetch the data, so raise an error
|
690 |
+
raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
|
691 |
+
# We can fetch the data, but should warn the user
|
692 |
+
# that this may be slow
|
693 |
+
warnings.warn(
|
694 |
+
f"Read is outside the known file parts: {(start, stop)}. "
|
695 |
+
f"IO/caching performance may be poor!"
|
696 |
+
)
|
697 |
+
logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
|
698 |
+
self.total_requested_bytes += stop - start
|
699 |
+
self.miss_count += 1
|
700 |
+
return out + super()._fetch(start, stop)
|
701 |
+
|
702 |
+
|
703 |
+
class UpdatableLRU(Generic[P, T]):
|
704 |
+
"""
|
705 |
+
Custom implementation of LRU cache that allows updating keys
|
706 |
+
|
707 |
+
Used by BackgroudBlockCache
|
708 |
+
"""
|
709 |
+
|
710 |
+
class CacheInfo(NamedTuple):
|
711 |
+
hits: int
|
712 |
+
misses: int
|
713 |
+
maxsize: int
|
714 |
+
currsize: int
|
715 |
+
|
716 |
+
def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
|
717 |
+
self._cache: OrderedDict[Any, T] = collections.OrderedDict()
|
718 |
+
self._func = func
|
719 |
+
self._max_size = max_size
|
720 |
+
self._hits = 0
|
721 |
+
self._misses = 0
|
722 |
+
self._lock = threading.Lock()
|
723 |
+
|
724 |
+
def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
|
725 |
+
if kwargs:
|
726 |
+
raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
|
727 |
+
with self._lock:
|
728 |
+
if args in self._cache:
|
729 |
+
self._cache.move_to_end(args)
|
730 |
+
self._hits += 1
|
731 |
+
return self._cache[args]
|
732 |
+
|
733 |
+
result = self._func(*args, **kwargs)
|
734 |
+
|
735 |
+
with self._lock:
|
736 |
+
self._cache[args] = result
|
737 |
+
self._misses += 1
|
738 |
+
if len(self._cache) > self._max_size:
|
739 |
+
self._cache.popitem(last=False)
|
740 |
+
|
741 |
+
return result
|
742 |
+
|
743 |
+
def is_key_cached(self, *args: Any) -> bool:
|
744 |
+
with self._lock:
|
745 |
+
return args in self._cache
|
746 |
+
|
747 |
+
def add_key(self, result: T, *args: Any) -> None:
|
748 |
+
with self._lock:
|
749 |
+
self._cache[args] = result
|
750 |
+
if len(self._cache) > self._max_size:
|
751 |
+
self._cache.popitem(last=False)
|
752 |
+
|
753 |
+
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
754 |
+
with self._lock:
|
755 |
+
return self.CacheInfo(
|
756 |
+
maxsize=self._max_size,
|
757 |
+
currsize=len(self._cache),
|
758 |
+
hits=self._hits,
|
759 |
+
misses=self._misses,
|
760 |
+
)
|
761 |
+
|
762 |
+
|
763 |
+
class BackgroundBlockCache(BaseCache):
|
764 |
+
"""
|
765 |
+
Cache holding memory as a set of blocks with pre-loading of
|
766 |
+
the next block in the background.
|
767 |
+
|
768 |
+
Requests are only ever made ``blocksize`` at a time, and are
|
769 |
+
stored in an LRU cache. The least recently accessed block is
|
770 |
+
discarded when more than ``maxblocks`` are stored. If the
|
771 |
+
next block is not in cache, it is loaded in a separate thread
|
772 |
+
in non-blocking way.
|
773 |
+
|
774 |
+
Parameters
|
775 |
+
----------
|
776 |
+
blocksize : int
|
777 |
+
The number of bytes to store in each block.
|
778 |
+
Requests are only ever made for ``blocksize``, so this
|
779 |
+
should balance the overhead of making a request against
|
780 |
+
the granularity of the blocks.
|
781 |
+
fetcher : Callable
|
782 |
+
size : int
|
783 |
+
The total size of the file being cached.
|
784 |
+
maxblocks : int
|
785 |
+
The maximum number of blocks to cache for. The maximum memory
|
786 |
+
use for this cache is then ``blocksize * maxblocks``.
|
787 |
+
"""
|
788 |
+
|
789 |
+
name: ClassVar[str] = "background"
|
790 |
+
|
791 |
+
def __init__(
|
792 |
+
self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
|
793 |
+
) -> None:
|
794 |
+
super().__init__(blocksize, fetcher, size)
|
795 |
+
self.nblocks = math.ceil(size / blocksize)
|
796 |
+
self.maxblocks = maxblocks
|
797 |
+
self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
|
798 |
+
|
799 |
+
self._thread_executor = ThreadPoolExecutor(max_workers=1)
|
800 |
+
self._fetch_future_block_number: int | None = None
|
801 |
+
self._fetch_future: Future[bytes] | None = None
|
802 |
+
self._fetch_future_lock = threading.Lock()
|
803 |
+
|
804 |
+
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
805 |
+
"""
|
806 |
+
The statistics on the block cache.
|
807 |
+
|
808 |
+
Returns
|
809 |
+
-------
|
810 |
+
NamedTuple
|
811 |
+
Returned directly from the LRU Cache used internally.
|
812 |
+
"""
|
813 |
+
return self._fetch_block_cached.cache_info()
|
814 |
+
|
815 |
+
def __getstate__(self) -> dict[str, Any]:
|
816 |
+
state = self.__dict__
|
817 |
+
del state["_fetch_block_cached"]
|
818 |
+
del state["_thread_executor"]
|
819 |
+
del state["_fetch_future_block_number"]
|
820 |
+
del state["_fetch_future"]
|
821 |
+
del state["_fetch_future_lock"]
|
822 |
+
return state
|
823 |
+
|
824 |
+
def __setstate__(self, state) -> None:
|
825 |
+
self.__dict__.update(state)
|
826 |
+
self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
|
827 |
+
self._thread_executor = ThreadPoolExecutor(max_workers=1)
|
828 |
+
self._fetch_future_block_number = None
|
829 |
+
self._fetch_future = None
|
830 |
+
self._fetch_future_lock = threading.Lock()
|
831 |
+
|
832 |
+
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
833 |
+
if start is None:
|
834 |
+
start = 0
|
835 |
+
if end is None:
|
836 |
+
end = self.size
|
837 |
+
if start >= self.size or start >= end:
|
838 |
+
return b""
|
839 |
+
|
840 |
+
# byte position -> block numbers
|
841 |
+
start_block_number = start // self.blocksize
|
842 |
+
end_block_number = end // self.blocksize
|
843 |
+
|
844 |
+
fetch_future_block_number = None
|
845 |
+
fetch_future = None
|
846 |
+
with self._fetch_future_lock:
|
847 |
+
# Background thread is running. Check we we can or must join it.
|
848 |
+
if self._fetch_future is not None:
|
849 |
+
assert self._fetch_future_block_number is not None
|
850 |
+
if self._fetch_future.done():
|
851 |
+
logger.info("BlockCache joined background fetch without waiting.")
|
852 |
+
self._fetch_block_cached.add_key(
|
853 |
+
self._fetch_future.result(), self._fetch_future_block_number
|
854 |
+
)
|
855 |
+
# Cleanup the fetch variables. Done with fetching the block.
|
856 |
+
self._fetch_future_block_number = None
|
857 |
+
self._fetch_future = None
|
858 |
+
else:
|
859 |
+
# Must join if we need the block for the current fetch
|
860 |
+
must_join = bool(
|
861 |
+
start_block_number
|
862 |
+
<= self._fetch_future_block_number
|
863 |
+
<= end_block_number
|
864 |
+
)
|
865 |
+
if must_join:
|
866 |
+
# Copy to the local variables to release lock
|
867 |
+
# before waiting for result
|
868 |
+
fetch_future_block_number = self._fetch_future_block_number
|
869 |
+
fetch_future = self._fetch_future
|
870 |
+
|
871 |
+
# Cleanup the fetch variables. Have a local copy.
|
872 |
+
self._fetch_future_block_number = None
|
873 |
+
self._fetch_future = None
|
874 |
+
|
875 |
+
# Need to wait for the future for the current read
|
876 |
+
if fetch_future is not None:
|
877 |
+
logger.info("BlockCache waiting for background fetch.")
|
878 |
+
# Wait until result and put it in cache
|
879 |
+
self._fetch_block_cached.add_key(
|
880 |
+
fetch_future.result(), fetch_future_block_number
|
881 |
+
)
|
882 |
+
|
883 |
+
# these are cached, so safe to do multiple calls for the same start and end.
|
884 |
+
for block_number in range(start_block_number, end_block_number + 1):
|
885 |
+
self._fetch_block_cached(block_number)
|
886 |
+
|
887 |
+
# fetch next block in the background if nothing is running in the background,
|
888 |
+
# the block is within file and it is not already cached
|
889 |
+
end_block_plus_1 = end_block_number + 1
|
890 |
+
with self._fetch_future_lock:
|
891 |
+
if (
|
892 |
+
self._fetch_future is None
|
893 |
+
and end_block_plus_1 <= self.nblocks
|
894 |
+
and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
|
895 |
+
):
|
896 |
+
self._fetch_future_block_number = end_block_plus_1
|
897 |
+
self._fetch_future = self._thread_executor.submit(
|
898 |
+
self._fetch_block, end_block_plus_1, "async"
|
899 |
+
)
|
900 |
+
|
901 |
+
return self._read_cache(
|
902 |
+
start,
|
903 |
+
end,
|
904 |
+
start_block_number=start_block_number,
|
905 |
+
end_block_number=end_block_number,
|
906 |
+
)
|
907 |
+
|
908 |
+
def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
|
909 |
+
"""
|
910 |
+
Fetch the block of data for `block_number`.
|
911 |
+
"""
|
912 |
+
if block_number > self.nblocks:
|
913 |
+
raise ValueError(
|
914 |
+
f"'block_number={block_number}' is greater than "
|
915 |
+
f"the number of blocks ({self.nblocks})"
|
916 |
+
)
|
917 |
+
|
918 |
+
start = block_number * self.blocksize
|
919 |
+
end = start + self.blocksize
|
920 |
+
logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
|
921 |
+
self.total_requested_bytes += end - start
|
922 |
+
self.miss_count += 1
|
923 |
+
block_contents = super()._fetch(start, end)
|
924 |
+
return block_contents
|
925 |
+
|
926 |
+
def _read_cache(
|
927 |
+
self, start: int, end: int, start_block_number: int, end_block_number: int
|
928 |
+
) -> bytes:
|
929 |
+
"""
|
930 |
+
Read from our block cache.
|
931 |
+
|
932 |
+
Parameters
|
933 |
+
----------
|
934 |
+
start, end : int
|
935 |
+
The start and end byte positions.
|
936 |
+
start_block_number, end_block_number : int
|
937 |
+
The start and end block numbers.
|
938 |
+
"""
|
939 |
+
start_pos = start % self.blocksize
|
940 |
+
end_pos = end % self.blocksize
|
941 |
+
|
942 |
+
# kind of pointless to count this as a hit, but it is
|
943 |
+
self.hit_count += 1
|
944 |
+
|
945 |
+
if start_block_number == end_block_number:
|
946 |
+
block = self._fetch_block_cached(start_block_number)
|
947 |
+
return block[start_pos:end_pos]
|
948 |
+
|
949 |
+
else:
|
950 |
+
# read from the initial
|
951 |
+
out = [self._fetch_block_cached(start_block_number)[start_pos:]]
|
952 |
+
|
953 |
+
# intermediate blocks
|
954 |
+
# Note: it'd be nice to combine these into one big request. However
|
955 |
+
# that doesn't play nicely with our LRU cache.
|
956 |
+
out.extend(
|
957 |
+
map(
|
958 |
+
self._fetch_block_cached,
|
959 |
+
range(start_block_number + 1, end_block_number),
|
960 |
+
)
|
961 |
+
)
|
962 |
+
|
963 |
+
# final block
|
964 |
+
out.append(self._fetch_block_cached(end_block_number)[:end_pos])
|
965 |
+
|
966 |
+
return b"".join(out)
|
967 |
+
|
968 |
+
|
969 |
+
caches: dict[str | None, type[BaseCache]] = {
|
970 |
+
# one custom case
|
971 |
+
None: BaseCache,
|
972 |
+
}
|
973 |
+
|
974 |
+
|
975 |
+
def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
|
976 |
+
"""'Register' cache implementation.
|
977 |
+
|
978 |
+
Parameters
|
979 |
+
----------
|
980 |
+
clobber: bool, optional
|
981 |
+
If set to True (default is False) - allow to overwrite existing
|
982 |
+
entry.
|
983 |
+
|
984 |
+
Raises
|
985 |
+
------
|
986 |
+
ValueError
|
987 |
+
"""
|
988 |
+
name = cls.name
|
989 |
+
if not clobber and name in caches:
|
990 |
+
raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
|
991 |
+
caches[name] = cls
|
992 |
+
|
993 |
+
|
994 |
+
for c in (
|
995 |
+
BaseCache,
|
996 |
+
MMapCache,
|
997 |
+
BytesCache,
|
998 |
+
ReadAheadCache,
|
999 |
+
BlockCache,
|
1000 |
+
FirstChunkCache,
|
1001 |
+
AllBytes,
|
1002 |
+
KnownPartsOfAFile,
|
1003 |
+
BackgroundBlockCache,
|
1004 |
+
):
|
1005 |
+
register_cache(c)
|
venv/lib/python3.12/site-packages/fsspec/callbacks.py
ADDED
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from functools import wraps
|
2 |
+
|
3 |
+
|
4 |
+
class Callback:
|
5 |
+
"""
|
6 |
+
Base class and interface for callback mechanism
|
7 |
+
|
8 |
+
This class can be used directly for monitoring file transfers by
|
9 |
+
providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
|
10 |
+
below), or subclassed for more specialised behaviour.
|
11 |
+
|
12 |
+
Parameters
|
13 |
+
----------
|
14 |
+
size: int (optional)
|
15 |
+
Nominal quantity for the value that corresponds to a complete
|
16 |
+
transfer, e.g., total number of tiles or total number of
|
17 |
+
bytes
|
18 |
+
value: int (0)
|
19 |
+
Starting internal counter value
|
20 |
+
hooks: dict or None
|
21 |
+
A dict of named functions to be called on each update. The signature
|
22 |
+
of these must be ``f(size, value, **kwargs)``
|
23 |
+
"""
|
24 |
+
|
25 |
+
def __init__(self, size=None, value=0, hooks=None, **kwargs):
|
26 |
+
self.size = size
|
27 |
+
self.value = value
|
28 |
+
self.hooks = hooks or {}
|
29 |
+
self.kw = kwargs
|
30 |
+
|
31 |
+
def __enter__(self):
|
32 |
+
return self
|
33 |
+
|
34 |
+
def __exit__(self, *exc_args):
|
35 |
+
self.close()
|
36 |
+
|
37 |
+
def close(self):
|
38 |
+
"""Close callback."""
|
39 |
+
|
40 |
+
def branched(self, path_1, path_2, **kwargs):
|
41 |
+
"""
|
42 |
+
Return callback for child transfers
|
43 |
+
|
44 |
+
If this callback is operating at a higher level, e.g., put, which may
|
45 |
+
trigger transfers that can also be monitored. The function returns a callback
|
46 |
+
that has to be passed to the child method, e.g., put_file,
|
47 |
+
as `callback=` argument.
|
48 |
+
|
49 |
+
The implementation uses `callback.branch` for compatibility.
|
50 |
+
When implementing callbacks, it is recommended to override this function instead
|
51 |
+
of `branch` and avoid calling `super().branched(...)`.
|
52 |
+
|
53 |
+
Prefer using this function over `branch`.
|
54 |
+
|
55 |
+
Parameters
|
56 |
+
----------
|
57 |
+
path_1: str
|
58 |
+
Child's source path
|
59 |
+
path_2: str
|
60 |
+
Child's destination path
|
61 |
+
**kwargs:
|
62 |
+
Arbitrary keyword arguments
|
63 |
+
|
64 |
+
Returns
|
65 |
+
-------
|
66 |
+
callback: Callback
|
67 |
+
A callback instance to be passed to the child method
|
68 |
+
"""
|
69 |
+
self.branch(path_1, path_2, kwargs)
|
70 |
+
# mutate kwargs so that we can force the caller to pass "callback=" explicitly
|
71 |
+
return kwargs.pop("callback", DEFAULT_CALLBACK)
|
72 |
+
|
73 |
+
def branch_coro(self, fn):
|
74 |
+
"""
|
75 |
+
Wraps a coroutine, and pass a new child callback to it.
|
76 |
+
"""
|
77 |
+
|
78 |
+
@wraps(fn)
|
79 |
+
async def func(path1, path2: str, **kwargs):
|
80 |
+
with self.branched(path1, path2, **kwargs) as child:
|
81 |
+
return await fn(path1, path2, callback=child, **kwargs)
|
82 |
+
|
83 |
+
return func
|
84 |
+
|
85 |
+
def set_size(self, size):
|
86 |
+
"""
|
87 |
+
Set the internal maximum size attribute
|
88 |
+
|
89 |
+
Usually called if not initially set at instantiation. Note that this
|
90 |
+
triggers a ``call()``.
|
91 |
+
|
92 |
+
Parameters
|
93 |
+
----------
|
94 |
+
size: int
|
95 |
+
"""
|
96 |
+
self.size = size
|
97 |
+
self.call()
|
98 |
+
|
99 |
+
def absolute_update(self, value):
|
100 |
+
"""
|
101 |
+
Set the internal value state
|
102 |
+
|
103 |
+
Triggers ``call()``
|
104 |
+
|
105 |
+
Parameters
|
106 |
+
----------
|
107 |
+
value: int
|
108 |
+
"""
|
109 |
+
self.value = value
|
110 |
+
self.call()
|
111 |
+
|
112 |
+
def relative_update(self, inc=1):
|
113 |
+
"""
|
114 |
+
Delta increment the internal counter
|
115 |
+
|
116 |
+
Triggers ``call()``
|
117 |
+
|
118 |
+
Parameters
|
119 |
+
----------
|
120 |
+
inc: int
|
121 |
+
"""
|
122 |
+
self.value += inc
|
123 |
+
self.call()
|
124 |
+
|
125 |
+
def call(self, hook_name=None, **kwargs):
|
126 |
+
"""
|
127 |
+
Execute hook(s) with current state
|
128 |
+
|
129 |
+
Each function is passed the internal size and current value
|
130 |
+
|
131 |
+
Parameters
|
132 |
+
----------
|
133 |
+
hook_name: str or None
|
134 |
+
If given, execute on this hook
|
135 |
+
kwargs: passed on to (all) hook(s)
|
136 |
+
"""
|
137 |
+
if not self.hooks:
|
138 |
+
return
|
139 |
+
kw = self.kw.copy()
|
140 |
+
kw.update(kwargs)
|
141 |
+
if hook_name:
|
142 |
+
if hook_name not in self.hooks:
|
143 |
+
return
|
144 |
+
return self.hooks[hook_name](self.size, self.value, **kw)
|
145 |
+
for hook in self.hooks.values() or []:
|
146 |
+
hook(self.size, self.value, **kw)
|
147 |
+
|
148 |
+
def wrap(self, iterable):
|
149 |
+
"""
|
150 |
+
Wrap an iterable to call ``relative_update`` on each iterations
|
151 |
+
|
152 |
+
Parameters
|
153 |
+
----------
|
154 |
+
iterable: Iterable
|
155 |
+
The iterable that is being wrapped
|
156 |
+
"""
|
157 |
+
for item in iterable:
|
158 |
+
self.relative_update()
|
159 |
+
yield item
|
160 |
+
|
161 |
+
def branch(self, path_1, path_2, kwargs):
|
162 |
+
"""
|
163 |
+
Set callbacks for child transfers
|
164 |
+
|
165 |
+
If this callback is operating at a higher level, e.g., put, which may
|
166 |
+
trigger transfers that can also be monitored. The passed kwargs are
|
167 |
+
to be *mutated* to add ``callback=``, if this class supports branching
|
168 |
+
to children.
|
169 |
+
|
170 |
+
Parameters
|
171 |
+
----------
|
172 |
+
path_1: str
|
173 |
+
Child's source path
|
174 |
+
path_2: str
|
175 |
+
Child's destination path
|
176 |
+
kwargs: dict
|
177 |
+
arguments passed to child method, e.g., put_file.
|
178 |
+
|
179 |
+
Returns
|
180 |
+
-------
|
181 |
+
|
182 |
+
"""
|
183 |
+
return None
|
184 |
+
|
185 |
+
def no_op(self, *_, **__):
|
186 |
+
pass
|
187 |
+
|
188 |
+
def __getattr__(self, item):
|
189 |
+
"""
|
190 |
+
If undefined methods are called on this class, nothing happens
|
191 |
+
"""
|
192 |
+
return self.no_op
|
193 |
+
|
194 |
+
@classmethod
|
195 |
+
def as_callback(cls, maybe_callback=None):
|
196 |
+
"""Transform callback=... into Callback instance
|
197 |
+
|
198 |
+
For the special value of ``None``, return the global instance of
|
199 |
+
``NoOpCallback``. This is an alternative to including
|
200 |
+
``callback=DEFAULT_CALLBACK`` directly in a method signature.
|
201 |
+
"""
|
202 |
+
if maybe_callback is None:
|
203 |
+
return DEFAULT_CALLBACK
|
204 |
+
return maybe_callback
|
205 |
+
|
206 |
+
|
207 |
+
class NoOpCallback(Callback):
|
208 |
+
"""
|
209 |
+
This implementation of Callback does exactly nothing
|
210 |
+
"""
|
211 |
+
|
212 |
+
def call(self, *args, **kwargs):
|
213 |
+
return None
|
214 |
+
|
215 |
+
|
216 |
+
class DotPrinterCallback(Callback):
|
217 |
+
"""
|
218 |
+
Simple example Callback implementation
|
219 |
+
|
220 |
+
Almost identical to Callback with a hook that prints a char; here we
|
221 |
+
demonstrate how the outer layer may print "#" and the inner layer "."
|
222 |
+
"""
|
223 |
+
|
224 |
+
def __init__(self, chr_to_print="#", **kwargs):
|
225 |
+
self.chr = chr_to_print
|
226 |
+
super().__init__(**kwargs)
|
227 |
+
|
228 |
+
def branch(self, path_1, path_2, kwargs):
|
229 |
+
"""Mutate kwargs to add new instance with different print char"""
|
230 |
+
kwargs["callback"] = DotPrinterCallback(".")
|
231 |
+
|
232 |
+
def call(self, **kwargs):
|
233 |
+
"""Just outputs a character"""
|
234 |
+
print(self.chr, end="")
|
235 |
+
|
236 |
+
|
237 |
+
class TqdmCallback(Callback):
|
238 |
+
"""
|
239 |
+
A callback to display a progress bar using tqdm
|
240 |
+
|
241 |
+
Parameters
|
242 |
+
----------
|
243 |
+
tqdm_kwargs : dict, (optional)
|
244 |
+
Any argument accepted by the tqdm constructor.
|
245 |
+
See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
|
246 |
+
Will be forwarded to `tqdm_cls`.
|
247 |
+
tqdm_cls: (optional)
|
248 |
+
subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
|
249 |
+
|
250 |
+
Examples
|
251 |
+
--------
|
252 |
+
>>> import fsspec
|
253 |
+
>>> from fsspec.callbacks import TqdmCallback
|
254 |
+
>>> fs = fsspec.filesystem("memory")
|
255 |
+
>>> path2distant_data = "/your-path"
|
256 |
+
>>> fs.upload(
|
257 |
+
".",
|
258 |
+
path2distant_data,
|
259 |
+
recursive=True,
|
260 |
+
callback=TqdmCallback(),
|
261 |
+
)
|
262 |
+
|
263 |
+
You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
|
264 |
+
|
265 |
+
>>> fs.upload(
|
266 |
+
".",
|
267 |
+
path2distant_data,
|
268 |
+
recursive=True,
|
269 |
+
callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
|
270 |
+
)
|
271 |
+
|
272 |
+
You can also customize the progress bar by passing a subclass of `tqdm`.
|
273 |
+
|
274 |
+
.. code-block:: python
|
275 |
+
|
276 |
+
class TqdmFormat(tqdm):
|
277 |
+
'''Provides a `total_time` format parameter'''
|
278 |
+
@property
|
279 |
+
def format_dict(self):
|
280 |
+
d = super().format_dict
|
281 |
+
total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
|
282 |
+
d.update(total_time=self.format_interval(total_time) + " in total")
|
283 |
+
return d
|
284 |
+
|
285 |
+
>>> with TqdmCallback(
|
286 |
+
tqdm_kwargs={
|
287 |
+
"desc": "desc",
|
288 |
+
"bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
|
289 |
+
},
|
290 |
+
tqdm_cls=TqdmFormat,
|
291 |
+
) as callback:
|
292 |
+
fs.upload(".", path2distant_data, recursive=True, callback=callback)
|
293 |
+
"""
|
294 |
+
|
295 |
+
def __init__(self, tqdm_kwargs=None, *args, **kwargs):
|
296 |
+
try:
|
297 |
+
from tqdm import tqdm
|
298 |
+
|
299 |
+
except ImportError as exce:
|
300 |
+
raise ImportError(
|
301 |
+
"Using TqdmCallback requires tqdm to be installed"
|
302 |
+
) from exce
|
303 |
+
|
304 |
+
self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
|
305 |
+
self._tqdm_kwargs = tqdm_kwargs or {}
|
306 |
+
self.tqdm = None
|
307 |
+
super().__init__(*args, **kwargs)
|
308 |
+
|
309 |
+
def call(self, *args, **kwargs):
|
310 |
+
if self.tqdm is None:
|
311 |
+
self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
|
312 |
+
self.tqdm.total = self.size
|
313 |
+
self.tqdm.update(self.value - self.tqdm.n)
|
314 |
+
|
315 |
+
def close(self):
|
316 |
+
if self.tqdm is not None:
|
317 |
+
self.tqdm.close()
|
318 |
+
self.tqdm = None
|
319 |
+
|
320 |
+
def __del__(self):
|
321 |
+
return self.close()
|
322 |
+
|
323 |
+
|
324 |
+
DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
|
venv/lib/python3.12/site-packages/fsspec/compression.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Helper functions for a standard streaming compression API"""
|
2 |
+
|
3 |
+
from zipfile import ZipFile
|
4 |
+
|
5 |
+
import fsspec.utils
|
6 |
+
from fsspec.spec import AbstractBufferedFile
|
7 |
+
|
8 |
+
|
9 |
+
def noop_file(file, mode, **kwargs):
|
10 |
+
return file
|
11 |
+
|
12 |
+
|
13 |
+
# TODO: files should also be available as contexts
|
14 |
+
# should be functions of the form func(infile, mode=, **kwargs) -> file-like
|
15 |
+
compr = {None: noop_file}
|
16 |
+
|
17 |
+
|
18 |
+
def register_compression(name, callback, extensions, force=False):
|
19 |
+
"""Register an "inferable" file compression type.
|
20 |
+
|
21 |
+
Registers transparent file compression type for use with fsspec.open.
|
22 |
+
Compression can be specified by name in open, or "infer"-ed for any files
|
23 |
+
ending with the given extensions.
|
24 |
+
|
25 |
+
Args:
|
26 |
+
name: (str) The compression type name. Eg. "gzip".
|
27 |
+
callback: A callable of form (infile, mode, **kwargs) -> file-like.
|
28 |
+
Accepts an input file-like object, the target mode and kwargs.
|
29 |
+
Returns a wrapped file-like object.
|
30 |
+
extensions: (str, Iterable[str]) A file extension, or list of file
|
31 |
+
extensions for which to infer this compression scheme. Eg. "gz".
|
32 |
+
force: (bool) Force re-registration of compression type or extensions.
|
33 |
+
|
34 |
+
Raises:
|
35 |
+
ValueError: If name or extensions already registered, and not force.
|
36 |
+
|
37 |
+
"""
|
38 |
+
if isinstance(extensions, str):
|
39 |
+
extensions = [extensions]
|
40 |
+
|
41 |
+
# Validate registration
|
42 |
+
if name in compr and not force:
|
43 |
+
raise ValueError(f"Duplicate compression registration: {name}")
|
44 |
+
|
45 |
+
for ext in extensions:
|
46 |
+
if ext in fsspec.utils.compressions and not force:
|
47 |
+
raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
|
48 |
+
|
49 |
+
compr[name] = callback
|
50 |
+
|
51 |
+
for ext in extensions:
|
52 |
+
fsspec.utils.compressions[ext] = name
|
53 |
+
|
54 |
+
|
55 |
+
def unzip(infile, mode="rb", filename=None, **kwargs):
|
56 |
+
if "r" not in mode:
|
57 |
+
filename = filename or "file"
|
58 |
+
z = ZipFile(infile, mode="w", **kwargs)
|
59 |
+
fo = z.open(filename, mode="w")
|
60 |
+
fo.close = lambda closer=fo.close: closer() or z.close()
|
61 |
+
return fo
|
62 |
+
z = ZipFile(infile)
|
63 |
+
if filename is None:
|
64 |
+
filename = z.namelist()[0]
|
65 |
+
return z.open(filename, mode="r", **kwargs)
|
66 |
+
|
67 |
+
|
68 |
+
register_compression("zip", unzip, "zip")
|
69 |
+
|
70 |
+
try:
|
71 |
+
from bz2 import BZ2File
|
72 |
+
except ImportError:
|
73 |
+
pass
|
74 |
+
else:
|
75 |
+
register_compression("bz2", BZ2File, "bz2")
|
76 |
+
|
77 |
+
try: # pragma: no cover
|
78 |
+
from isal import igzip
|
79 |
+
|
80 |
+
def isal(infile, mode="rb", **kwargs):
|
81 |
+
return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
|
82 |
+
|
83 |
+
register_compression("gzip", isal, "gz")
|
84 |
+
except ImportError:
|
85 |
+
from gzip import GzipFile
|
86 |
+
|
87 |
+
register_compression(
|
88 |
+
"gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
|
89 |
+
)
|
90 |
+
|
91 |
+
try:
|
92 |
+
from lzma import LZMAFile
|
93 |
+
|
94 |
+
register_compression("lzma", LZMAFile, "lzma")
|
95 |
+
register_compression("xz", LZMAFile, "xz")
|
96 |
+
except ImportError:
|
97 |
+
pass
|
98 |
+
|
99 |
+
try:
|
100 |
+
import lzmaffi
|
101 |
+
|
102 |
+
register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
|
103 |
+
register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
|
104 |
+
except ImportError:
|
105 |
+
pass
|
106 |
+
|
107 |
+
|
108 |
+
class SnappyFile(AbstractBufferedFile):
|
109 |
+
def __init__(self, infile, mode, **kwargs):
|
110 |
+
import snappy
|
111 |
+
|
112 |
+
super().__init__(
|
113 |
+
fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
|
114 |
+
)
|
115 |
+
self.infile = infile
|
116 |
+
if "r" in mode:
|
117 |
+
self.codec = snappy.StreamDecompressor()
|
118 |
+
else:
|
119 |
+
self.codec = snappy.StreamCompressor()
|
120 |
+
|
121 |
+
def _upload_chunk(self, final=False):
|
122 |
+
self.buffer.seek(0)
|
123 |
+
out = self.codec.add_chunk(self.buffer.read())
|
124 |
+
self.infile.write(out)
|
125 |
+
return True
|
126 |
+
|
127 |
+
def seek(self, loc, whence=0):
|
128 |
+
raise NotImplementedError("SnappyFile is not seekable")
|
129 |
+
|
130 |
+
def seekable(self):
|
131 |
+
return False
|
132 |
+
|
133 |
+
def _fetch_range(self, start, end):
|
134 |
+
"""Get the specified set of bytes from remote"""
|
135 |
+
data = self.infile.read(end - start)
|
136 |
+
return self.codec.decompress(data)
|
137 |
+
|
138 |
+
|
139 |
+
try:
|
140 |
+
import snappy
|
141 |
+
|
142 |
+
snappy.compress(b"")
|
143 |
+
# Snappy may use the .sz file extension, but this is not part of the
|
144 |
+
# standard implementation.
|
145 |
+
register_compression("snappy", SnappyFile, [])
|
146 |
+
|
147 |
+
except (ImportError, NameError, AttributeError):
|
148 |
+
pass
|
149 |
+
|
150 |
+
try:
|
151 |
+
import lz4.frame
|
152 |
+
|
153 |
+
register_compression("lz4", lz4.frame.open, "lz4")
|
154 |
+
except ImportError:
|
155 |
+
pass
|
156 |
+
|
157 |
+
try:
|
158 |
+
import zstandard as zstd
|
159 |
+
|
160 |
+
def zstandard_file(infile, mode="rb"):
|
161 |
+
if "r" in mode:
|
162 |
+
cctx = zstd.ZstdDecompressor()
|
163 |
+
return cctx.stream_reader(infile)
|
164 |
+
else:
|
165 |
+
cctx = zstd.ZstdCompressor(level=10)
|
166 |
+
return cctx.stream_writer(infile)
|
167 |
+
|
168 |
+
register_compression("zstd", zstandard_file, "zst")
|
169 |
+
except ImportError:
|
170 |
+
pass
|
171 |
+
|
172 |
+
|
173 |
+
def available_compressions():
|
174 |
+
"""Return a list of the implemented compressions."""
|
175 |
+
return list(compr)
|
venv/lib/python3.12/site-packages/fsspec/config.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import configparser
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
import warnings
|
7 |
+
from typing import Any
|
8 |
+
|
9 |
+
conf: dict[str, dict[str, Any]] = {}
|
10 |
+
default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
|
11 |
+
conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
|
12 |
+
|
13 |
+
|
14 |
+
def set_conf_env(conf_dict, envdict=os.environ):
|
15 |
+
"""Set config values from environment variables
|
16 |
+
|
17 |
+
Looks for variables of the form ``FSSPEC_<protocol>`` and
|
18 |
+
``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
|
19 |
+
as a json dictionary and used to ``update`` the config of the
|
20 |
+
corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
|
21 |
+
attempt to convert the string value, but the kwarg keys will be lower-cased.
|
22 |
+
|
23 |
+
The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
|
24 |
+
``FSSPEC_<protocol>`` ones.
|
25 |
+
|
26 |
+
Parameters
|
27 |
+
----------
|
28 |
+
conf_dict : dict(str, dict)
|
29 |
+
This dict will be mutated
|
30 |
+
envdict : dict-like(str, str)
|
31 |
+
Source for the values - usually the real environment
|
32 |
+
"""
|
33 |
+
kwarg_keys = []
|
34 |
+
for key in envdict:
|
35 |
+
if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
|
36 |
+
if key.count("_") > 1:
|
37 |
+
kwarg_keys.append(key)
|
38 |
+
continue
|
39 |
+
try:
|
40 |
+
value = json.loads(envdict[key])
|
41 |
+
except json.decoder.JSONDecodeError as ex:
|
42 |
+
warnings.warn(
|
43 |
+
f"Ignoring environment variable {key} due to a parse failure: {ex}"
|
44 |
+
)
|
45 |
+
else:
|
46 |
+
if isinstance(value, dict):
|
47 |
+
_, proto = key.split("_", 1)
|
48 |
+
conf_dict.setdefault(proto.lower(), {}).update(value)
|
49 |
+
else:
|
50 |
+
warnings.warn(
|
51 |
+
f"Ignoring environment variable {key} due to not being a dict:"
|
52 |
+
f" {type(value)}"
|
53 |
+
)
|
54 |
+
elif key.startswith("FSSPEC"):
|
55 |
+
warnings.warn(
|
56 |
+
f"Ignoring environment variable {key} due to having an unexpected name"
|
57 |
+
)
|
58 |
+
|
59 |
+
for key in kwarg_keys:
|
60 |
+
_, proto, kwarg = key.split("_", 2)
|
61 |
+
conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
|
62 |
+
|
63 |
+
|
64 |
+
def set_conf_files(cdir, conf_dict):
|
65 |
+
"""Set config values from files
|
66 |
+
|
67 |
+
Scans for INI and JSON files in the given dictionary, and uses their
|
68 |
+
contents to set the config. In case of repeated values, later values
|
69 |
+
win.
|
70 |
+
|
71 |
+
In the case of INI files, all values are strings, and these will not
|
72 |
+
be converted.
|
73 |
+
|
74 |
+
Parameters
|
75 |
+
----------
|
76 |
+
cdir : str
|
77 |
+
Directory to search
|
78 |
+
conf_dict : dict(str, dict)
|
79 |
+
This dict will be mutated
|
80 |
+
"""
|
81 |
+
if not os.path.isdir(cdir):
|
82 |
+
return
|
83 |
+
allfiles = sorted(os.listdir(cdir))
|
84 |
+
for fn in allfiles:
|
85 |
+
if fn.endswith(".ini"):
|
86 |
+
ini = configparser.ConfigParser()
|
87 |
+
ini.read(os.path.join(cdir, fn))
|
88 |
+
for key in ini:
|
89 |
+
if key == "DEFAULT":
|
90 |
+
continue
|
91 |
+
conf_dict.setdefault(key, {}).update(dict(ini[key]))
|
92 |
+
if fn.endswith(".json"):
|
93 |
+
with open(os.path.join(cdir, fn)) as f:
|
94 |
+
js = json.load(f)
|
95 |
+
for key in js:
|
96 |
+
conf_dict.setdefault(key, {}).update(dict(js[key]))
|
97 |
+
|
98 |
+
|
99 |
+
def apply_config(cls, kwargs, conf_dict=None):
|
100 |
+
"""Supply default values for kwargs when instantiating class
|
101 |
+
|
102 |
+
Augments the passed kwargs, by finding entries in the config dict
|
103 |
+
which match the classes ``.protocol`` attribute (one or more str)
|
104 |
+
|
105 |
+
Parameters
|
106 |
+
----------
|
107 |
+
cls : file system implementation
|
108 |
+
kwargs : dict
|
109 |
+
conf_dict : dict of dict
|
110 |
+
Typically this is the global configuration
|
111 |
+
|
112 |
+
Returns
|
113 |
+
-------
|
114 |
+
dict : the modified set of kwargs
|
115 |
+
"""
|
116 |
+
if conf_dict is None:
|
117 |
+
conf_dict = conf
|
118 |
+
protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
|
119 |
+
kw = {}
|
120 |
+
for proto in protos:
|
121 |
+
# default kwargs from the current state of the config
|
122 |
+
if proto in conf_dict:
|
123 |
+
kw.update(conf_dict[proto])
|
124 |
+
# explicit kwargs always win
|
125 |
+
kw.update(**kwargs)
|
126 |
+
kwargs = kw
|
127 |
+
return kwargs
|
128 |
+
|
129 |
+
|
130 |
+
set_conf_files(conf_dir, conf)
|
131 |
+
set_conf_env(conf)
|
venv/lib/python3.12/site-packages/fsspec/conftest.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import subprocess
|
4 |
+
import sys
|
5 |
+
import time
|
6 |
+
|
7 |
+
import pytest
|
8 |
+
|
9 |
+
import fsspec
|
10 |
+
from fsspec.implementations.cached import CachingFileSystem
|
11 |
+
|
12 |
+
|
13 |
+
@pytest.fixture()
|
14 |
+
def m():
|
15 |
+
"""
|
16 |
+
Fixture providing a memory filesystem.
|
17 |
+
"""
|
18 |
+
m = fsspec.filesystem("memory")
|
19 |
+
m.store.clear()
|
20 |
+
m.pseudo_dirs.clear()
|
21 |
+
m.pseudo_dirs.append("")
|
22 |
+
try:
|
23 |
+
yield m
|
24 |
+
finally:
|
25 |
+
m.store.clear()
|
26 |
+
m.pseudo_dirs.clear()
|
27 |
+
m.pseudo_dirs.append("")
|
28 |
+
|
29 |
+
|
30 |
+
@pytest.fixture
|
31 |
+
def ftp_writable(tmpdir):
|
32 |
+
"""
|
33 |
+
Fixture providing a writable FTP filesystem.
|
34 |
+
"""
|
35 |
+
pytest.importorskip("pyftpdlib")
|
36 |
+
from fsspec.implementations.ftp import FTPFileSystem
|
37 |
+
|
38 |
+
FTPFileSystem.clear_instance_cache() # remove lingering connections
|
39 |
+
CachingFileSystem.clear_instance_cache()
|
40 |
+
d = str(tmpdir)
|
41 |
+
with open(os.path.join(d, "out"), "wb") as f:
|
42 |
+
f.write(b"hello" * 10000)
|
43 |
+
P = subprocess.Popen(
|
44 |
+
[sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
|
45 |
+
)
|
46 |
+
try:
|
47 |
+
time.sleep(1)
|
48 |
+
yield "localhost", 2121, "user", "pass"
|
49 |
+
finally:
|
50 |
+
P.terminate()
|
51 |
+
P.wait()
|
52 |
+
try:
|
53 |
+
shutil.rmtree(tmpdir)
|
54 |
+
except Exception:
|
55 |
+
pass
|
venv/lib/python3.12/site-packages/fsspec/core.py
ADDED
@@ -0,0 +1,743 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import io
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import re
|
7 |
+
from glob import has_magic
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
# for backwards compat, we export cache things from here too
|
11 |
+
from fsspec.caching import ( # noqa: F401
|
12 |
+
BaseCache,
|
13 |
+
BlockCache,
|
14 |
+
BytesCache,
|
15 |
+
MMapCache,
|
16 |
+
ReadAheadCache,
|
17 |
+
caches,
|
18 |
+
)
|
19 |
+
from fsspec.compression import compr
|
20 |
+
from fsspec.config import conf
|
21 |
+
from fsspec.registry import filesystem, get_filesystem_class
|
22 |
+
from fsspec.utils import (
|
23 |
+
_unstrip_protocol,
|
24 |
+
build_name_function,
|
25 |
+
infer_compression,
|
26 |
+
stringify_path,
|
27 |
+
)
|
28 |
+
|
29 |
+
logger = logging.getLogger("fsspec")
|
30 |
+
|
31 |
+
|
32 |
+
class OpenFile:
|
33 |
+
"""
|
34 |
+
File-like object to be used in a context
|
35 |
+
|
36 |
+
Can layer (buffered) text-mode and compression over any file-system, which
|
37 |
+
are typically binary-only.
|
38 |
+
|
39 |
+
These instances are safe to serialize, as the low-level file object
|
40 |
+
is not created until invoked using ``with``.
|
41 |
+
|
42 |
+
Parameters
|
43 |
+
----------
|
44 |
+
fs: FileSystem
|
45 |
+
The file system to use for opening the file. Should be a subclass or duck-type
|
46 |
+
with ``fsspec.spec.AbstractFileSystem``
|
47 |
+
path: str
|
48 |
+
Location to open
|
49 |
+
mode: str like 'rb', optional
|
50 |
+
Mode of the opened file
|
51 |
+
compression: str or None, optional
|
52 |
+
Compression to apply
|
53 |
+
encoding: str or None, optional
|
54 |
+
The encoding to use if opened in text mode.
|
55 |
+
errors: str or None, optional
|
56 |
+
How to handle encoding errors if opened in text mode.
|
57 |
+
newline: None or str
|
58 |
+
Passed to TextIOWrapper in text mode, how to handle line endings.
|
59 |
+
autoopen: bool
|
60 |
+
If True, calls open() immediately. Mostly used by pickle
|
61 |
+
pos: int
|
62 |
+
If given and autoopen is True, seek to this location immediately
|
63 |
+
"""
|
64 |
+
|
65 |
+
def __init__(
|
66 |
+
self,
|
67 |
+
fs,
|
68 |
+
path,
|
69 |
+
mode="rb",
|
70 |
+
compression=None,
|
71 |
+
encoding=None,
|
72 |
+
errors=None,
|
73 |
+
newline=None,
|
74 |
+
):
|
75 |
+
self.fs = fs
|
76 |
+
self.path = path
|
77 |
+
self.mode = mode
|
78 |
+
self.compression = get_compression(path, compression)
|
79 |
+
self.encoding = encoding
|
80 |
+
self.errors = errors
|
81 |
+
self.newline = newline
|
82 |
+
self.fobjects = []
|
83 |
+
|
84 |
+
def __reduce__(self):
|
85 |
+
return (
|
86 |
+
OpenFile,
|
87 |
+
(
|
88 |
+
self.fs,
|
89 |
+
self.path,
|
90 |
+
self.mode,
|
91 |
+
self.compression,
|
92 |
+
self.encoding,
|
93 |
+
self.errors,
|
94 |
+
self.newline,
|
95 |
+
),
|
96 |
+
)
|
97 |
+
|
98 |
+
def __repr__(self):
|
99 |
+
return f"<OpenFile '{self.path}'>"
|
100 |
+
|
101 |
+
def __enter__(self):
|
102 |
+
mode = self.mode.replace("t", "").replace("b", "") + "b"
|
103 |
+
|
104 |
+
try:
|
105 |
+
f = self.fs.open(self.path, mode=mode)
|
106 |
+
except FileNotFoundError as e:
|
107 |
+
if has_magic(self.path):
|
108 |
+
raise FileNotFoundError(
|
109 |
+
"%s not found. The URL contains glob characters: you maybe needed\n"
|
110 |
+
"to pass expand=True in fsspec.open() or the storage_options of \n"
|
111 |
+
"your library. You can also set the config value 'open_expand'\n"
|
112 |
+
"before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
|
113 |
+
self.path,
|
114 |
+
) from e
|
115 |
+
raise
|
116 |
+
|
117 |
+
self.fobjects = [f]
|
118 |
+
|
119 |
+
if self.compression is not None:
|
120 |
+
compress = compr[self.compression]
|
121 |
+
f = compress(f, mode=mode[0])
|
122 |
+
self.fobjects.append(f)
|
123 |
+
|
124 |
+
if "b" not in self.mode:
|
125 |
+
# assume, for example, that 'r' is equivalent to 'rt' as in builtin
|
126 |
+
f = PickleableTextIOWrapper(
|
127 |
+
f, encoding=self.encoding, errors=self.errors, newline=self.newline
|
128 |
+
)
|
129 |
+
self.fobjects.append(f)
|
130 |
+
|
131 |
+
return self.fobjects[-1]
|
132 |
+
|
133 |
+
def __exit__(self, *args):
|
134 |
+
self.close()
|
135 |
+
|
136 |
+
@property
|
137 |
+
def full_name(self):
|
138 |
+
return _unstrip_protocol(self.path, self.fs)
|
139 |
+
|
140 |
+
def open(self):
|
141 |
+
"""Materialise this as a real open file without context
|
142 |
+
|
143 |
+
The OpenFile object should be explicitly closed to avoid enclosed file
|
144 |
+
instances persisting. You must, therefore, keep a reference to the OpenFile
|
145 |
+
during the life of the file-like it generates.
|
146 |
+
"""
|
147 |
+
return self.__enter__()
|
148 |
+
|
149 |
+
def close(self):
|
150 |
+
"""Close all encapsulated file objects"""
|
151 |
+
for f in reversed(self.fobjects):
|
152 |
+
if "r" not in self.mode and not f.closed:
|
153 |
+
f.flush()
|
154 |
+
f.close()
|
155 |
+
self.fobjects.clear()
|
156 |
+
|
157 |
+
|
158 |
+
class OpenFiles(list):
|
159 |
+
"""List of OpenFile instances
|
160 |
+
|
161 |
+
Can be used in a single context, which opens and closes all of the
|
162 |
+
contained files. Normal list access to get the elements works as
|
163 |
+
normal.
|
164 |
+
|
165 |
+
A special case is made for caching filesystems - the files will
|
166 |
+
be down/uploaded together at the start or end of the context, and
|
167 |
+
this may happen concurrently, if the target filesystem supports it.
|
168 |
+
"""
|
169 |
+
|
170 |
+
def __init__(self, *args, mode="rb", fs=None):
|
171 |
+
self.mode = mode
|
172 |
+
self.fs = fs
|
173 |
+
self.files = []
|
174 |
+
super().__init__(*args)
|
175 |
+
|
176 |
+
def __enter__(self):
|
177 |
+
if self.fs is None:
|
178 |
+
raise ValueError("Context has already been used")
|
179 |
+
|
180 |
+
fs = self.fs
|
181 |
+
while True:
|
182 |
+
if hasattr(fs, "open_many"):
|
183 |
+
# check for concurrent cache download; or set up for upload
|
184 |
+
self.files = fs.open_many(self)
|
185 |
+
return self.files
|
186 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
187 |
+
fs = fs.fs
|
188 |
+
else:
|
189 |
+
break
|
190 |
+
return [s.__enter__() for s in self]
|
191 |
+
|
192 |
+
def __exit__(self, *args):
|
193 |
+
fs = self.fs
|
194 |
+
[s.__exit__(*args) for s in self]
|
195 |
+
if "r" not in self.mode:
|
196 |
+
while True:
|
197 |
+
if hasattr(fs, "open_many"):
|
198 |
+
# check for concurrent cache upload
|
199 |
+
fs.commit_many(self.files)
|
200 |
+
return
|
201 |
+
if hasattr(fs, "fs") and fs.fs is not None:
|
202 |
+
fs = fs.fs
|
203 |
+
else:
|
204 |
+
break
|
205 |
+
|
206 |
+
def __getitem__(self, item):
|
207 |
+
out = super().__getitem__(item)
|
208 |
+
if isinstance(item, slice):
|
209 |
+
return OpenFiles(out, mode=self.mode, fs=self.fs)
|
210 |
+
return out
|
211 |
+
|
212 |
+
def __repr__(self):
|
213 |
+
return f"<List of {len(self)} OpenFile instances>"
|
214 |
+
|
215 |
+
|
216 |
+
def open_files(
|
217 |
+
urlpath,
|
218 |
+
mode="rb",
|
219 |
+
compression=None,
|
220 |
+
encoding="utf8",
|
221 |
+
errors=None,
|
222 |
+
name_function=None,
|
223 |
+
num=1,
|
224 |
+
protocol=None,
|
225 |
+
newline=None,
|
226 |
+
auto_mkdir=True,
|
227 |
+
expand=True,
|
228 |
+
**kwargs,
|
229 |
+
):
|
230 |
+
"""Given a path or paths, return a list of ``OpenFile`` objects.
|
231 |
+
|
232 |
+
For writing, a str path must contain the "*" character, which will be filled
|
233 |
+
in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
|
234 |
+
|
235 |
+
For either reading or writing, can instead provide explicit list of paths.
|
236 |
+
|
237 |
+
Parameters
|
238 |
+
----------
|
239 |
+
urlpath: string or list
|
240 |
+
Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
|
241 |
+
to read from alternative filesystems. To read from multiple files you
|
242 |
+
can pass a globstring or a list of paths, with the caveat that they
|
243 |
+
must all have the same protocol.
|
244 |
+
mode: 'rb', 'wt', etc.
|
245 |
+
compression: string or None
|
246 |
+
If given, open file using compression codec. Can either be a compression
|
247 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
248 |
+
compression from the filename suffix.
|
249 |
+
encoding: str
|
250 |
+
For text mode only
|
251 |
+
errors: None or str
|
252 |
+
Passed to TextIOWrapper in text mode
|
253 |
+
name_function: function or None
|
254 |
+
if opening a set of files for writing, those files do not yet exist,
|
255 |
+
so we need to generate their names by formatting the urlpath for
|
256 |
+
each sequence number
|
257 |
+
num: int [1]
|
258 |
+
if writing mode, number of files we expect to create (passed to
|
259 |
+
name+function)
|
260 |
+
protocol: str or None
|
261 |
+
If given, overrides the protocol found in the URL.
|
262 |
+
newline: bytes or None
|
263 |
+
Used for line terminator in text mode. If None, uses system default;
|
264 |
+
if blank, uses no translation.
|
265 |
+
auto_mkdir: bool (True)
|
266 |
+
If in write mode, this will ensure the target directory exists before
|
267 |
+
writing, by calling ``fs.mkdirs(exist_ok=True)``.
|
268 |
+
expand: bool
|
269 |
+
**kwargs: dict
|
270 |
+
Extra options that make sense to a particular storage connection, e.g.
|
271 |
+
host, port, username, password, etc.
|
272 |
+
|
273 |
+
Examples
|
274 |
+
--------
|
275 |
+
>>> files = open_files('2015-*-*.csv') # doctest: +SKIP
|
276 |
+
>>> files = open_files(
|
277 |
+
... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
|
278 |
+
... ) # doctest: +SKIP
|
279 |
+
|
280 |
+
Returns
|
281 |
+
-------
|
282 |
+
An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
|
283 |
+
be used as a single context
|
284 |
+
|
285 |
+
Notes
|
286 |
+
-----
|
287 |
+
For a full list of the available protocols and the implementations that
|
288 |
+
they map across to see the latest online documentation:
|
289 |
+
|
290 |
+
- For implementations built into ``fsspec`` see
|
291 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
292 |
+
- For implementations in separate packages see
|
293 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
294 |
+
"""
|
295 |
+
fs, fs_token, paths = get_fs_token_paths(
|
296 |
+
urlpath,
|
297 |
+
mode,
|
298 |
+
num=num,
|
299 |
+
name_function=name_function,
|
300 |
+
storage_options=kwargs,
|
301 |
+
protocol=protocol,
|
302 |
+
expand=expand,
|
303 |
+
)
|
304 |
+
if fs.protocol == "file":
|
305 |
+
fs.auto_mkdir = auto_mkdir
|
306 |
+
elif "r" not in mode and auto_mkdir:
|
307 |
+
parents = {fs._parent(path) for path in paths}
|
308 |
+
for parent in parents:
|
309 |
+
try:
|
310 |
+
fs.makedirs(parent, exist_ok=True)
|
311 |
+
except PermissionError:
|
312 |
+
pass
|
313 |
+
return OpenFiles(
|
314 |
+
[
|
315 |
+
OpenFile(
|
316 |
+
fs,
|
317 |
+
path,
|
318 |
+
mode=mode,
|
319 |
+
compression=compression,
|
320 |
+
encoding=encoding,
|
321 |
+
errors=errors,
|
322 |
+
newline=newline,
|
323 |
+
)
|
324 |
+
for path in paths
|
325 |
+
],
|
326 |
+
mode=mode,
|
327 |
+
fs=fs,
|
328 |
+
)
|
329 |
+
|
330 |
+
|
331 |
+
def _un_chain(path, kwargs):
|
332 |
+
# Avoid a circular import
|
333 |
+
from fsspec.implementations.cached import CachingFileSystem
|
334 |
+
|
335 |
+
if "::" in path:
|
336 |
+
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
|
337 |
+
bits = []
|
338 |
+
for p in path.split("::"):
|
339 |
+
if "://" in p or x.match(p):
|
340 |
+
bits.append(p)
|
341 |
+
else:
|
342 |
+
bits.append(p + "://")
|
343 |
+
else:
|
344 |
+
bits = [path]
|
345 |
+
# [[url, protocol, kwargs], ...]
|
346 |
+
out = []
|
347 |
+
previous_bit = None
|
348 |
+
kwargs = kwargs.copy()
|
349 |
+
for bit in reversed(bits):
|
350 |
+
protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
|
351 |
+
cls = get_filesystem_class(protocol)
|
352 |
+
extra_kwargs = cls._get_kwargs_from_urls(bit)
|
353 |
+
kws = kwargs.pop(protocol, {})
|
354 |
+
if bit is bits[0]:
|
355 |
+
kws.update(kwargs)
|
356 |
+
kw = dict(
|
357 |
+
**{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
|
358 |
+
**kws,
|
359 |
+
)
|
360 |
+
bit = cls._strip_protocol(bit)
|
361 |
+
if "target_protocol" not in kw and issubclass(cls, CachingFileSystem):
|
362 |
+
bit = previous_bit
|
363 |
+
out.append((bit, protocol, kw))
|
364 |
+
previous_bit = bit
|
365 |
+
out.reverse()
|
366 |
+
return out
|
367 |
+
|
368 |
+
|
369 |
+
def url_to_fs(url, **kwargs):
|
370 |
+
"""
|
371 |
+
Turn fully-qualified and potentially chained URL into filesystem instance
|
372 |
+
|
373 |
+
Parameters
|
374 |
+
----------
|
375 |
+
url : str
|
376 |
+
The fsspec-compatible URL
|
377 |
+
**kwargs: dict
|
378 |
+
Extra options that make sense to a particular storage connection, e.g.
|
379 |
+
host, port, username, password, etc.
|
380 |
+
|
381 |
+
Returns
|
382 |
+
-------
|
383 |
+
filesystem : FileSystem
|
384 |
+
The new filesystem discovered from ``url`` and created with
|
385 |
+
``**kwargs``.
|
386 |
+
urlpath : str
|
387 |
+
The file-systems-specific URL for ``url``.
|
388 |
+
"""
|
389 |
+
url = stringify_path(url)
|
390 |
+
# non-FS arguments that appear in fsspec.open()
|
391 |
+
# inspect could keep this in sync with open()'s signature
|
392 |
+
known_kwargs = {
|
393 |
+
"compression",
|
394 |
+
"encoding",
|
395 |
+
"errors",
|
396 |
+
"expand",
|
397 |
+
"mode",
|
398 |
+
"name_function",
|
399 |
+
"newline",
|
400 |
+
"num",
|
401 |
+
}
|
402 |
+
kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
|
403 |
+
chain = _un_chain(url, kwargs)
|
404 |
+
inkwargs = {}
|
405 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
406 |
+
for i, ch in enumerate(reversed(chain)):
|
407 |
+
urls, protocol, kw = ch
|
408 |
+
if i == len(chain) - 1:
|
409 |
+
inkwargs = dict(**kw, **inkwargs)
|
410 |
+
continue
|
411 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
412 |
+
inkwargs["target_protocol"] = protocol
|
413 |
+
inkwargs["fo"] = urls
|
414 |
+
urlpath, protocol, _ = chain[0]
|
415 |
+
fs = filesystem(protocol, **inkwargs)
|
416 |
+
return fs, urlpath
|
417 |
+
|
418 |
+
|
419 |
+
DEFAULT_EXPAND = conf.get("open_expand", False)
|
420 |
+
|
421 |
+
|
422 |
+
def open(
|
423 |
+
urlpath,
|
424 |
+
mode="rb",
|
425 |
+
compression=None,
|
426 |
+
encoding="utf8",
|
427 |
+
errors=None,
|
428 |
+
protocol=None,
|
429 |
+
newline=None,
|
430 |
+
expand=None,
|
431 |
+
**kwargs,
|
432 |
+
):
|
433 |
+
"""Given a path or paths, return one ``OpenFile`` object.
|
434 |
+
|
435 |
+
Parameters
|
436 |
+
----------
|
437 |
+
urlpath: string or list
|
438 |
+
Absolute or relative filepath. Prefix with a protocol like ``s3://``
|
439 |
+
to read from alternative filesystems. Should not include glob
|
440 |
+
character(s).
|
441 |
+
mode: 'rb', 'wt', etc.
|
442 |
+
compression: string or None
|
443 |
+
If given, open file using compression codec. Can either be a compression
|
444 |
+
name (a key in ``fsspec.compression.compr``) or "infer" to guess the
|
445 |
+
compression from the filename suffix.
|
446 |
+
encoding: str
|
447 |
+
For text mode only
|
448 |
+
errors: None or str
|
449 |
+
Passed to TextIOWrapper in text mode
|
450 |
+
protocol: str or None
|
451 |
+
If given, overrides the protocol found in the URL.
|
452 |
+
newline: bytes or None
|
453 |
+
Used for line terminator in text mode. If None, uses system default;
|
454 |
+
if blank, uses no translation.
|
455 |
+
expand: bool or None
|
456 |
+
Whether to regard file paths containing special glob characters as needing
|
457 |
+
expansion (finding the first match) or absolute. Setting False allows using
|
458 |
+
paths which do embed such characters. If None (default), this argument
|
459 |
+
takes its value from the DEFAULT_EXPAND module variable, which takes
|
460 |
+
its initial value from the "open_expand" config value at startup, which will
|
461 |
+
be False if not set.
|
462 |
+
**kwargs: dict
|
463 |
+
Extra options that make sense to a particular storage connection, e.g.
|
464 |
+
host, port, username, password, etc.
|
465 |
+
|
466 |
+
Examples
|
467 |
+
--------
|
468 |
+
>>> openfile = open('2015-01-01.csv') # doctest: +SKIP
|
469 |
+
>>> openfile = open(
|
470 |
+
... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
|
471 |
+
... ) # doctest: +SKIP
|
472 |
+
>>> with openfile as f:
|
473 |
+
... df = pd.read_csv(f) # doctest: +SKIP
|
474 |
+
...
|
475 |
+
|
476 |
+
Returns
|
477 |
+
-------
|
478 |
+
``OpenFile`` object.
|
479 |
+
|
480 |
+
Notes
|
481 |
+
-----
|
482 |
+
For a full list of the available protocols and the implementations that
|
483 |
+
they map across to see the latest online documentation:
|
484 |
+
|
485 |
+
- For implementations built into ``fsspec`` see
|
486 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
|
487 |
+
- For implementations in separate packages see
|
488 |
+
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
489 |
+
"""
|
490 |
+
expand = DEFAULT_EXPAND if expand is None else expand
|
491 |
+
out = open_files(
|
492 |
+
urlpath=[urlpath],
|
493 |
+
mode=mode,
|
494 |
+
compression=compression,
|
495 |
+
encoding=encoding,
|
496 |
+
errors=errors,
|
497 |
+
protocol=protocol,
|
498 |
+
newline=newline,
|
499 |
+
expand=expand,
|
500 |
+
**kwargs,
|
501 |
+
)
|
502 |
+
if not out:
|
503 |
+
raise FileNotFoundError(urlpath)
|
504 |
+
return out[0]
|
505 |
+
|
506 |
+
|
507 |
+
def open_local(
|
508 |
+
url: str | list[str] | Path | list[Path],
|
509 |
+
mode: str = "rb",
|
510 |
+
**storage_options: dict,
|
511 |
+
) -> str | list[str]:
|
512 |
+
"""Open file(s) which can be resolved to local
|
513 |
+
|
514 |
+
For files which either are local, or get downloaded upon open
|
515 |
+
(e.g., by file caching)
|
516 |
+
|
517 |
+
Parameters
|
518 |
+
----------
|
519 |
+
url: str or list(str)
|
520 |
+
mode: str
|
521 |
+
Must be read mode
|
522 |
+
storage_options:
|
523 |
+
passed on to FS for or used by open_files (e.g., compression)
|
524 |
+
"""
|
525 |
+
if "r" not in mode:
|
526 |
+
raise ValueError("Can only ensure local files when reading")
|
527 |
+
of = open_files(url, mode=mode, **storage_options)
|
528 |
+
if not getattr(of[0].fs, "local_file", False):
|
529 |
+
raise ValueError(
|
530 |
+
"open_local can only be used on a filesystem which"
|
531 |
+
" has attribute local_file=True"
|
532 |
+
)
|
533 |
+
with of as files:
|
534 |
+
paths = [f.name for f in files]
|
535 |
+
if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
|
536 |
+
return paths[0]
|
537 |
+
return paths
|
538 |
+
|
539 |
+
|
540 |
+
def get_compression(urlpath, compression):
|
541 |
+
if compression == "infer":
|
542 |
+
compression = infer_compression(urlpath)
|
543 |
+
if compression is not None and compression not in compr:
|
544 |
+
raise ValueError(f"Compression type {compression} not supported")
|
545 |
+
return compression
|
546 |
+
|
547 |
+
|
548 |
+
def split_protocol(urlpath):
|
549 |
+
"""Return protocol, path pair"""
|
550 |
+
urlpath = stringify_path(urlpath)
|
551 |
+
if "://" in urlpath:
|
552 |
+
protocol, path = urlpath.split("://", 1)
|
553 |
+
if len(protocol) > 1:
|
554 |
+
# excludes Windows paths
|
555 |
+
return protocol, path
|
556 |
+
if urlpath.startswith("data:"):
|
557 |
+
return urlpath.split(":", 1)
|
558 |
+
return None, urlpath
|
559 |
+
|
560 |
+
|
561 |
+
def strip_protocol(urlpath):
|
562 |
+
"""Return only path part of full URL, according to appropriate backend"""
|
563 |
+
protocol, _ = split_protocol(urlpath)
|
564 |
+
cls = get_filesystem_class(protocol)
|
565 |
+
return cls._strip_protocol(urlpath)
|
566 |
+
|
567 |
+
|
568 |
+
def expand_paths_if_needed(paths, mode, num, fs, name_function):
|
569 |
+
"""Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
|
570 |
+
in them (read mode).
|
571 |
+
|
572 |
+
:param paths: list of paths
|
573 |
+
mode: str
|
574 |
+
Mode in which to open files.
|
575 |
+
num: int
|
576 |
+
If opening in writing mode, number of files we expect to create.
|
577 |
+
fs: filesystem object
|
578 |
+
name_function: callable
|
579 |
+
If opening in writing mode, this callable is used to generate path
|
580 |
+
names. Names are generated for each partition by
|
581 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
582 |
+
:return: list of paths
|
583 |
+
"""
|
584 |
+
expanded_paths = []
|
585 |
+
paths = list(paths)
|
586 |
+
|
587 |
+
if "w" in mode: # read mode
|
588 |
+
if sum(1 for p in paths if "*" in p) > 1:
|
589 |
+
raise ValueError(
|
590 |
+
"When writing data, only one filename mask can be specified."
|
591 |
+
)
|
592 |
+
num = max(num, len(paths))
|
593 |
+
|
594 |
+
for curr_path in paths:
|
595 |
+
if "*" in curr_path:
|
596 |
+
# expand using name_function
|
597 |
+
expanded_paths.extend(_expand_paths(curr_path, name_function, num))
|
598 |
+
else:
|
599 |
+
expanded_paths.append(curr_path)
|
600 |
+
# if we generated more paths that asked for, trim the list
|
601 |
+
if len(expanded_paths) > num:
|
602 |
+
expanded_paths = expanded_paths[:num]
|
603 |
+
|
604 |
+
else: # read mode
|
605 |
+
for curr_path in paths:
|
606 |
+
if has_magic(curr_path):
|
607 |
+
# expand using glob
|
608 |
+
expanded_paths.extend(fs.glob(curr_path))
|
609 |
+
else:
|
610 |
+
expanded_paths.append(curr_path)
|
611 |
+
|
612 |
+
return expanded_paths
|
613 |
+
|
614 |
+
|
615 |
+
def get_fs_token_paths(
|
616 |
+
urlpath,
|
617 |
+
mode="rb",
|
618 |
+
num=1,
|
619 |
+
name_function=None,
|
620 |
+
storage_options=None,
|
621 |
+
protocol=None,
|
622 |
+
expand=True,
|
623 |
+
):
|
624 |
+
"""Filesystem, deterministic token, and paths from a urlpath and options.
|
625 |
+
|
626 |
+
Parameters
|
627 |
+
----------
|
628 |
+
urlpath: string or iterable
|
629 |
+
Absolute or relative filepath, URL (may include protocols like
|
630 |
+
``s3://``), or globstring pointing to data.
|
631 |
+
mode: str, optional
|
632 |
+
Mode in which to open files.
|
633 |
+
num: int, optional
|
634 |
+
If opening in writing mode, number of files we expect to create.
|
635 |
+
name_function: callable, optional
|
636 |
+
If opening in writing mode, this callable is used to generate path
|
637 |
+
names. Names are generated for each partition by
|
638 |
+
``urlpath.replace('*', name_function(partition_index))``.
|
639 |
+
storage_options: dict, optional
|
640 |
+
Additional keywords to pass to the filesystem class.
|
641 |
+
protocol: str or None
|
642 |
+
To override the protocol specifier in the URL
|
643 |
+
expand: bool
|
644 |
+
Expand string paths for writing, assuming the path is a directory
|
645 |
+
"""
|
646 |
+
if isinstance(urlpath, (list, tuple, set)):
|
647 |
+
if not urlpath:
|
648 |
+
raise ValueError("empty urlpath sequence")
|
649 |
+
urlpath0 = stringify_path(next(iter(urlpath)))
|
650 |
+
else:
|
651 |
+
urlpath0 = stringify_path(urlpath)
|
652 |
+
storage_options = storage_options or {}
|
653 |
+
if protocol:
|
654 |
+
storage_options["protocol"] = protocol
|
655 |
+
chain = _un_chain(urlpath0, storage_options or {})
|
656 |
+
inkwargs = {}
|
657 |
+
# Reverse iterate the chain, creating a nested target_* structure
|
658 |
+
for i, ch in enumerate(reversed(chain)):
|
659 |
+
urls, nested_protocol, kw = ch
|
660 |
+
if i == len(chain) - 1:
|
661 |
+
inkwargs = dict(**kw, **inkwargs)
|
662 |
+
continue
|
663 |
+
inkwargs["target_options"] = dict(**kw, **inkwargs)
|
664 |
+
inkwargs["target_protocol"] = nested_protocol
|
665 |
+
inkwargs["fo"] = urls
|
666 |
+
paths, protocol, _ = chain[0]
|
667 |
+
fs = filesystem(protocol, **inkwargs)
|
668 |
+
if isinstance(urlpath, (list, tuple, set)):
|
669 |
+
pchains = [
|
670 |
+
_un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
|
671 |
+
]
|
672 |
+
if len({pc[1] for pc in pchains}) > 1:
|
673 |
+
raise ValueError("Protocol mismatch getting fs from %s", urlpath)
|
674 |
+
paths = [pc[0] for pc in pchains]
|
675 |
+
else:
|
676 |
+
paths = fs._strip_protocol(paths)
|
677 |
+
if isinstance(paths, (list, tuple, set)):
|
678 |
+
if expand:
|
679 |
+
paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
|
680 |
+
elif not isinstance(paths, list):
|
681 |
+
paths = list(paths)
|
682 |
+
else:
|
683 |
+
if ("w" in mode or "x" in mode) and expand:
|
684 |
+
paths = _expand_paths(paths, name_function, num)
|
685 |
+
elif "*" in paths:
|
686 |
+
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
|
687 |
+
else:
|
688 |
+
paths = [paths]
|
689 |
+
|
690 |
+
return fs, fs._fs_token, paths
|
691 |
+
|
692 |
+
|
693 |
+
def _expand_paths(path, name_function, num):
|
694 |
+
if isinstance(path, str):
|
695 |
+
if path.count("*") > 1:
|
696 |
+
raise ValueError("Output path spec must contain exactly one '*'.")
|
697 |
+
elif "*" not in path:
|
698 |
+
path = os.path.join(path, "*.part")
|
699 |
+
|
700 |
+
if name_function is None:
|
701 |
+
name_function = build_name_function(num - 1)
|
702 |
+
|
703 |
+
paths = [path.replace("*", name_function(i)) for i in range(num)]
|
704 |
+
if paths != sorted(paths):
|
705 |
+
logger.warning(
|
706 |
+
"In order to preserve order between partitions"
|
707 |
+
" paths created with ``name_function`` should "
|
708 |
+
"sort to partition order"
|
709 |
+
)
|
710 |
+
elif isinstance(path, (tuple, list)):
|
711 |
+
assert len(path) == num
|
712 |
+
paths = list(path)
|
713 |
+
else:
|
714 |
+
raise ValueError(
|
715 |
+
"Path should be either\n"
|
716 |
+
"1. A list of paths: ['foo.json', 'bar.json', ...]\n"
|
717 |
+
"2. A directory: 'foo/\n"
|
718 |
+
"3. A path with a '*' in it: 'foo.*.json'"
|
719 |
+
)
|
720 |
+
return paths
|
721 |
+
|
722 |
+
|
723 |
+
class PickleableTextIOWrapper(io.TextIOWrapper):
|
724 |
+
"""TextIOWrapper cannot be pickled. This solves it.
|
725 |
+
|
726 |
+
Requires that ``buffer`` be pickleable, which all instances of
|
727 |
+
AbstractBufferedFile are.
|
728 |
+
"""
|
729 |
+
|
730 |
+
def __init__(
|
731 |
+
self,
|
732 |
+
buffer,
|
733 |
+
encoding=None,
|
734 |
+
errors=None,
|
735 |
+
newline=None,
|
736 |
+
line_buffering=False,
|
737 |
+
write_through=False,
|
738 |
+
):
|
739 |
+
self.args = buffer, encoding, errors, newline, line_buffering, write_through
|
740 |
+
super().__init__(*self.args)
|
741 |
+
|
742 |
+
def __reduce__(self):
|
743 |
+
return PickleableTextIOWrapper, self.args
|
venv/lib/python3.12/site-packages/fsspec/dircache.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from collections.abc import MutableMapping
|
3 |
+
from functools import lru_cache
|
4 |
+
|
5 |
+
|
6 |
+
class DirCache(MutableMapping):
|
7 |
+
"""
|
8 |
+
Caching of directory listings, in a structure like::
|
9 |
+
|
10 |
+
{"path0": [
|
11 |
+
{"name": "path0/file0",
|
12 |
+
"size": 123,
|
13 |
+
"type": "file",
|
14 |
+
...
|
15 |
+
},
|
16 |
+
{"name": "path0/file1",
|
17 |
+
},
|
18 |
+
...
|
19 |
+
],
|
20 |
+
"path1": [...]
|
21 |
+
}
|
22 |
+
|
23 |
+
Parameters to this class control listing expiry or indeed turn
|
24 |
+
caching off
|
25 |
+
"""
|
26 |
+
|
27 |
+
def __init__(
|
28 |
+
self,
|
29 |
+
use_listings_cache=True,
|
30 |
+
listings_expiry_time=None,
|
31 |
+
max_paths=None,
|
32 |
+
**kwargs,
|
33 |
+
):
|
34 |
+
"""
|
35 |
+
|
36 |
+
Parameters
|
37 |
+
----------
|
38 |
+
use_listings_cache: bool
|
39 |
+
If False, this cache never returns items, but always reports KeyError,
|
40 |
+
and setting items has no effect
|
41 |
+
listings_expiry_time: int or float (optional)
|
42 |
+
Time in seconds that a listing is considered valid. If None,
|
43 |
+
listings do not expire.
|
44 |
+
max_paths: int (optional)
|
45 |
+
The number of most recent listings that are considered valid; 'recent'
|
46 |
+
refers to when the entry was set.
|
47 |
+
"""
|
48 |
+
self._cache = {}
|
49 |
+
self._times = {}
|
50 |
+
if max_paths:
|
51 |
+
self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
|
52 |
+
self.use_listings_cache = use_listings_cache
|
53 |
+
self.listings_expiry_time = listings_expiry_time
|
54 |
+
self.max_paths = max_paths
|
55 |
+
|
56 |
+
def __getitem__(self, item):
|
57 |
+
if self.listings_expiry_time is not None:
|
58 |
+
if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
|
59 |
+
del self._cache[item]
|
60 |
+
if self.max_paths:
|
61 |
+
self._q(item)
|
62 |
+
return self._cache[item] # maybe raises KeyError
|
63 |
+
|
64 |
+
def clear(self):
|
65 |
+
self._cache.clear()
|
66 |
+
|
67 |
+
def __len__(self):
|
68 |
+
return len(self._cache)
|
69 |
+
|
70 |
+
def __contains__(self, item):
|
71 |
+
try:
|
72 |
+
self[item]
|
73 |
+
return True
|
74 |
+
except KeyError:
|
75 |
+
return False
|
76 |
+
|
77 |
+
def __setitem__(self, key, value):
|
78 |
+
if not self.use_listings_cache:
|
79 |
+
return
|
80 |
+
if self.max_paths:
|
81 |
+
self._q(key)
|
82 |
+
self._cache[key] = value
|
83 |
+
if self.listings_expiry_time is not None:
|
84 |
+
self._times[key] = time.time()
|
85 |
+
|
86 |
+
def __delitem__(self, key):
|
87 |
+
del self._cache[key]
|
88 |
+
|
89 |
+
def __iter__(self):
|
90 |
+
entries = list(self._cache)
|
91 |
+
|
92 |
+
return (k for k in entries if k in self)
|
93 |
+
|
94 |
+
def __reduce__(self):
|
95 |
+
return (
|
96 |
+
DirCache,
|
97 |
+
(self.use_listings_cache, self.listings_expiry_time, self.max_paths),
|
98 |
+
)
|
venv/lib/python3.12/site-packages/fsspec/exceptions.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
fsspec user-defined exception classes
|
3 |
+
"""
|
4 |
+
|
5 |
+
import asyncio
|
6 |
+
|
7 |
+
|
8 |
+
class BlocksizeMismatchError(ValueError):
|
9 |
+
"""
|
10 |
+
Raised when a cached file is opened with a different blocksize than it was
|
11 |
+
written with
|
12 |
+
"""
|
13 |
+
|
14 |
+
|
15 |
+
class FSTimeoutError(asyncio.TimeoutError):
|
16 |
+
"""
|
17 |
+
Raised when a fsspec function timed out occurs
|
18 |
+
"""
|
venv/lib/python3.12/site-packages/fsspec/fuse.py
ADDED
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
import stat
|
5 |
+
import threading
|
6 |
+
import time
|
7 |
+
from errno import EIO, ENOENT
|
8 |
+
|
9 |
+
from fuse import FUSE, FuseOSError, LoggingMixIn, Operations
|
10 |
+
|
11 |
+
from fsspec import __version__
|
12 |
+
from fsspec.core import url_to_fs
|
13 |
+
|
14 |
+
logger = logging.getLogger("fsspec.fuse")
|
15 |
+
|
16 |
+
|
17 |
+
class FUSEr(Operations):
|
18 |
+
def __init__(self, fs, path, ready_file=False):
|
19 |
+
self.fs = fs
|
20 |
+
self.cache = {}
|
21 |
+
self.root = path.rstrip("/") + "/"
|
22 |
+
self.counter = 0
|
23 |
+
logger.info("Starting FUSE at %s", path)
|
24 |
+
self._ready_file = ready_file
|
25 |
+
|
26 |
+
def getattr(self, path, fh=None):
|
27 |
+
logger.debug("getattr %s", path)
|
28 |
+
if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
|
29 |
+
return {"type": "file", "st_size": 5}
|
30 |
+
|
31 |
+
path = "".join([self.root, path.lstrip("/")]).rstrip("/")
|
32 |
+
try:
|
33 |
+
info = self.fs.info(path)
|
34 |
+
except FileNotFoundError as exc:
|
35 |
+
raise FuseOSError(ENOENT) from exc
|
36 |
+
|
37 |
+
data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
|
38 |
+
perm = info.get("mode", 0o777)
|
39 |
+
|
40 |
+
if info["type"] != "file":
|
41 |
+
data["st_mode"] = stat.S_IFDIR | perm
|
42 |
+
data["st_size"] = 0
|
43 |
+
data["st_blksize"] = 0
|
44 |
+
else:
|
45 |
+
data["st_mode"] = stat.S_IFREG | perm
|
46 |
+
data["st_size"] = info["size"]
|
47 |
+
data["st_blksize"] = 5 * 2**20
|
48 |
+
data["st_nlink"] = 1
|
49 |
+
data["st_atime"] = info["atime"] if "atime" in info else time.time()
|
50 |
+
data["st_ctime"] = info["ctime"] if "ctime" in info else time.time()
|
51 |
+
data["st_mtime"] = info["mtime"] if "mtime" in info else time.time()
|
52 |
+
return data
|
53 |
+
|
54 |
+
def readdir(self, path, fh):
|
55 |
+
logger.debug("readdir %s", path)
|
56 |
+
path = "".join([self.root, path.lstrip("/")])
|
57 |
+
files = self.fs.ls(path, False)
|
58 |
+
files = [os.path.basename(f.rstrip("/")) for f in files]
|
59 |
+
return [".", ".."] + files
|
60 |
+
|
61 |
+
def mkdir(self, path, mode):
|
62 |
+
path = "".join([self.root, path.lstrip("/")])
|
63 |
+
self.fs.mkdir(path)
|
64 |
+
return 0
|
65 |
+
|
66 |
+
def rmdir(self, path):
|
67 |
+
path = "".join([self.root, path.lstrip("/")])
|
68 |
+
self.fs.rmdir(path)
|
69 |
+
return 0
|
70 |
+
|
71 |
+
def read(self, path, size, offset, fh):
|
72 |
+
logger.debug("read %s", (path, size, offset))
|
73 |
+
if self._ready_file and path in ["/.fuse_ready", ".fuse_ready"]:
|
74 |
+
# status indicator
|
75 |
+
return b"ready"
|
76 |
+
|
77 |
+
f = self.cache[fh]
|
78 |
+
f.seek(offset)
|
79 |
+
out = f.read(size)
|
80 |
+
return out
|
81 |
+
|
82 |
+
def write(self, path, data, offset, fh):
|
83 |
+
logger.debug("write %s", (path, offset))
|
84 |
+
f = self.cache[fh]
|
85 |
+
f.seek(offset)
|
86 |
+
f.write(data)
|
87 |
+
return len(data)
|
88 |
+
|
89 |
+
def create(self, path, flags, fi=None):
|
90 |
+
logger.debug("create %s", (path, flags))
|
91 |
+
fn = "".join([self.root, path.lstrip("/")])
|
92 |
+
self.fs.touch(fn) # OS will want to get attributes immediately
|
93 |
+
f = self.fs.open(fn, "wb")
|
94 |
+
self.cache[self.counter] = f
|
95 |
+
self.counter += 1
|
96 |
+
return self.counter - 1
|
97 |
+
|
98 |
+
def open(self, path, flags):
|
99 |
+
logger.debug("open %s", (path, flags))
|
100 |
+
fn = "".join([self.root, path.lstrip("/")])
|
101 |
+
if flags % 2 == 0:
|
102 |
+
# read
|
103 |
+
mode = "rb"
|
104 |
+
else:
|
105 |
+
# write/create
|
106 |
+
mode = "wb"
|
107 |
+
self.cache[self.counter] = self.fs.open(fn, mode)
|
108 |
+
self.counter += 1
|
109 |
+
return self.counter - 1
|
110 |
+
|
111 |
+
def truncate(self, path, length, fh=None):
|
112 |
+
fn = "".join([self.root, path.lstrip("/")])
|
113 |
+
if length != 0:
|
114 |
+
raise NotImplementedError
|
115 |
+
# maybe should be no-op since open with write sets size to zero anyway
|
116 |
+
self.fs.touch(fn)
|
117 |
+
|
118 |
+
def unlink(self, path):
|
119 |
+
fn = "".join([self.root, path.lstrip("/")])
|
120 |
+
try:
|
121 |
+
self.fs.rm(fn, False)
|
122 |
+
except (OSError, FileNotFoundError) as exc:
|
123 |
+
raise FuseOSError(EIO) from exc
|
124 |
+
|
125 |
+
def release(self, path, fh):
|
126 |
+
try:
|
127 |
+
if fh in self.cache:
|
128 |
+
f = self.cache[fh]
|
129 |
+
f.close()
|
130 |
+
self.cache.pop(fh)
|
131 |
+
except Exception as e:
|
132 |
+
print(e)
|
133 |
+
return 0
|
134 |
+
|
135 |
+
def chmod(self, path, mode):
|
136 |
+
if hasattr(self.fs, "chmod"):
|
137 |
+
path = "".join([self.root, path.lstrip("/")])
|
138 |
+
return self.fs.chmod(path, mode)
|
139 |
+
raise NotImplementedError
|
140 |
+
|
141 |
+
|
142 |
+
def run(
|
143 |
+
fs,
|
144 |
+
path,
|
145 |
+
mount_point,
|
146 |
+
foreground=True,
|
147 |
+
threads=False,
|
148 |
+
ready_file=False,
|
149 |
+
ops_class=FUSEr,
|
150 |
+
):
|
151 |
+
"""Mount stuff in a local directory
|
152 |
+
|
153 |
+
This uses fusepy to make it appear as if a given path on an fsspec
|
154 |
+
instance is in fact resident within the local file-system.
|
155 |
+
|
156 |
+
This requires that fusepy by installed, and that FUSE be available on
|
157 |
+
the system (typically requiring a package to be installed with
|
158 |
+
apt, yum, brew, etc.).
|
159 |
+
|
160 |
+
Parameters
|
161 |
+
----------
|
162 |
+
fs: file-system instance
|
163 |
+
From one of the compatible implementations
|
164 |
+
path: str
|
165 |
+
Location on that file-system to regard as the root directory to
|
166 |
+
mount. Note that you typically should include the terminating "/"
|
167 |
+
character.
|
168 |
+
mount_point: str
|
169 |
+
An empty directory on the local file-system where the contents of
|
170 |
+
the remote path will appear.
|
171 |
+
foreground: bool
|
172 |
+
Whether or not calling this function will block. Operation will
|
173 |
+
typically be more stable if True.
|
174 |
+
threads: bool
|
175 |
+
Whether or not to create threads when responding to file operations
|
176 |
+
within the mounter directory. Operation will typically be more
|
177 |
+
stable if False.
|
178 |
+
ready_file: bool
|
179 |
+
Whether the FUSE process is ready. The ``.fuse_ready`` file will
|
180 |
+
exist in the ``mount_point`` directory if True. Debugging purpose.
|
181 |
+
ops_class: FUSEr or Subclass of FUSEr
|
182 |
+
To override the default behavior of FUSEr. For Example, logging
|
183 |
+
to file.
|
184 |
+
|
185 |
+
"""
|
186 |
+
func = lambda: FUSE(
|
187 |
+
ops_class(fs, path, ready_file=ready_file),
|
188 |
+
mount_point,
|
189 |
+
nothreads=not threads,
|
190 |
+
foreground=foreground,
|
191 |
+
)
|
192 |
+
if not foreground:
|
193 |
+
th = threading.Thread(target=func)
|
194 |
+
th.daemon = True
|
195 |
+
th.start()
|
196 |
+
return th
|
197 |
+
else: # pragma: no cover
|
198 |
+
try:
|
199 |
+
func()
|
200 |
+
except KeyboardInterrupt:
|
201 |
+
pass
|
202 |
+
|
203 |
+
|
204 |
+
def main(args):
|
205 |
+
"""Mount filesystem from chained URL to MOUNT_POINT.
|
206 |
+
|
207 |
+
Examples:
|
208 |
+
|
209 |
+
python3 -m fsspec.fuse memory /usr/share /tmp/mem
|
210 |
+
|
211 |
+
python3 -m fsspec.fuse local /tmp/source /tmp/local \\
|
212 |
+
-l /tmp/fsspecfuse.log
|
213 |
+
|
214 |
+
You can also mount chained-URLs and use special settings:
|
215 |
+
|
216 |
+
python3 -m fsspec.fuse 'filecache::zip::file://data.zip' \\
|
217 |
+
/ /tmp/zip \\
|
218 |
+
-o 'filecache-cache_storage=/tmp/simplecache'
|
219 |
+
|
220 |
+
You can specify the type of the setting by using `[int]` or `[bool]`,
|
221 |
+
(`true`, `yes`, `1` represents the Boolean value `True`):
|
222 |
+
|
223 |
+
python3 -m fsspec.fuse 'simplecache::ftp://ftp1.at.proftpd.org' \\
|
224 |
+
/historic/packages/RPMS /tmp/ftp \\
|
225 |
+
-o 'simplecache-cache_storage=/tmp/simplecache' \\
|
226 |
+
-o 'simplecache-check_files=false[bool]' \\
|
227 |
+
-o 'ftp-listings_expiry_time=60[int]' \\
|
228 |
+
-o 'ftp-username=anonymous' \\
|
229 |
+
-o 'ftp-password=xieyanbo'
|
230 |
+
"""
|
231 |
+
|
232 |
+
class RawDescriptionArgumentParser(argparse.ArgumentParser):
|
233 |
+
def format_help(self):
|
234 |
+
usage = super().format_help()
|
235 |
+
parts = usage.split("\n\n")
|
236 |
+
parts[1] = self.description.rstrip()
|
237 |
+
return "\n\n".join(parts)
|
238 |
+
|
239 |
+
parser = RawDescriptionArgumentParser(prog="fsspec.fuse", description=main.__doc__)
|
240 |
+
parser.add_argument("--version", action="version", version=__version__)
|
241 |
+
parser.add_argument("url", type=str, help="fs url")
|
242 |
+
parser.add_argument("source_path", type=str, help="source directory in fs")
|
243 |
+
parser.add_argument("mount_point", type=str, help="local directory")
|
244 |
+
parser.add_argument(
|
245 |
+
"-o",
|
246 |
+
"--option",
|
247 |
+
action="append",
|
248 |
+
help="Any options of protocol included in the chained URL",
|
249 |
+
)
|
250 |
+
parser.add_argument(
|
251 |
+
"-l", "--log-file", type=str, help="Logging FUSE debug info (Default: '')"
|
252 |
+
)
|
253 |
+
parser.add_argument(
|
254 |
+
"-f",
|
255 |
+
"--foreground",
|
256 |
+
action="store_false",
|
257 |
+
help="Running in foreground or not (Default: False)",
|
258 |
+
)
|
259 |
+
parser.add_argument(
|
260 |
+
"-t",
|
261 |
+
"--threads",
|
262 |
+
action="store_false",
|
263 |
+
help="Running with threads support (Default: False)",
|
264 |
+
)
|
265 |
+
parser.add_argument(
|
266 |
+
"-r",
|
267 |
+
"--ready-file",
|
268 |
+
action="store_false",
|
269 |
+
help="The `.fuse_ready` file will exist after FUSE is ready. "
|
270 |
+
"(Debugging purpose, Default: False)",
|
271 |
+
)
|
272 |
+
args = parser.parse_args(args)
|
273 |
+
|
274 |
+
kwargs = {}
|
275 |
+
for item in args.option or []:
|
276 |
+
key, sep, value = item.partition("=")
|
277 |
+
if not sep:
|
278 |
+
parser.error(message=f"Wrong option: {item!r}")
|
279 |
+
val = value.lower()
|
280 |
+
if val.endswith("[int]"):
|
281 |
+
value = int(value[: -len("[int]")])
|
282 |
+
elif val.endswith("[bool]"):
|
283 |
+
value = val[: -len("[bool]")] in ["1", "yes", "true"]
|
284 |
+
|
285 |
+
if "-" in key:
|
286 |
+
fs_name, setting_name = key.split("-", 1)
|
287 |
+
if fs_name in kwargs:
|
288 |
+
kwargs[fs_name][setting_name] = value
|
289 |
+
else:
|
290 |
+
kwargs[fs_name] = {setting_name: value}
|
291 |
+
else:
|
292 |
+
kwargs[key] = value
|
293 |
+
|
294 |
+
if args.log_file:
|
295 |
+
logging.basicConfig(
|
296 |
+
level=logging.DEBUG,
|
297 |
+
filename=args.log_file,
|
298 |
+
format="%(asctime)s %(message)s",
|
299 |
+
)
|
300 |
+
|
301 |
+
class LoggingFUSEr(FUSEr, LoggingMixIn):
|
302 |
+
pass
|
303 |
+
|
304 |
+
fuser = LoggingFUSEr
|
305 |
+
else:
|
306 |
+
fuser = FUSEr
|
307 |
+
|
308 |
+
fs, url_path = url_to_fs(args.url, **kwargs)
|
309 |
+
logger.debug("Mounting %s to %s", url_path, str(args.mount_point))
|
310 |
+
run(
|
311 |
+
fs,
|
312 |
+
args.source_path,
|
313 |
+
args.mount_point,
|
314 |
+
foreground=args.foreground,
|
315 |
+
threads=args.threads,
|
316 |
+
ready_file=args.ready_file,
|
317 |
+
ops_class=fuser,
|
318 |
+
)
|
319 |
+
|
320 |
+
|
321 |
+
if __name__ == "__main__":
|
322 |
+
import sys
|
323 |
+
|
324 |
+
main(sys.argv[1:])
|
venv/lib/python3.12/site-packages/fsspec/generic.py
ADDED
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import inspect
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import shutil
|
7 |
+
import uuid
|
8 |
+
from typing import Optional
|
9 |
+
|
10 |
+
from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
|
11 |
+
from .callbacks import DEFAULT_CALLBACK
|
12 |
+
from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
|
13 |
+
|
14 |
+
_generic_fs = {}
|
15 |
+
logger = logging.getLogger("fsspec.generic")
|
16 |
+
|
17 |
+
|
18 |
+
def set_generic_fs(protocol, **storage_options):
|
19 |
+
"""Populate the dict used for method=="generic" lookups"""
|
20 |
+
_generic_fs[protocol] = filesystem(protocol, **storage_options)
|
21 |
+
|
22 |
+
|
23 |
+
def _resolve_fs(url, method, protocol=None, storage_options=None):
|
24 |
+
"""Pick instance of backend FS"""
|
25 |
+
url = url[0] if isinstance(url, (list, tuple)) else url
|
26 |
+
protocol = protocol or split_protocol(url)[0]
|
27 |
+
storage_options = storage_options or {}
|
28 |
+
if method == "default":
|
29 |
+
return filesystem(protocol)
|
30 |
+
if method == "generic":
|
31 |
+
return _generic_fs[protocol]
|
32 |
+
if method == "current":
|
33 |
+
cls = get_filesystem_class(protocol)
|
34 |
+
return cls.current()
|
35 |
+
if method == "options":
|
36 |
+
fs, _ = url_to_fs(url, **storage_options.get(protocol, {}))
|
37 |
+
return fs
|
38 |
+
raise ValueError(f"Unknown FS resolution method: {method}")
|
39 |
+
|
40 |
+
|
41 |
+
def rsync(
|
42 |
+
source,
|
43 |
+
destination,
|
44 |
+
delete_missing=False,
|
45 |
+
source_field="size",
|
46 |
+
dest_field="size",
|
47 |
+
update_cond="different",
|
48 |
+
inst_kwargs=None,
|
49 |
+
fs=None,
|
50 |
+
**kwargs,
|
51 |
+
):
|
52 |
+
"""Sync files between two directory trees
|
53 |
+
|
54 |
+
(experimental)
|
55 |
+
|
56 |
+
Parameters
|
57 |
+
----------
|
58 |
+
source: str
|
59 |
+
Root of the directory tree to take files from. This must be a directory, but
|
60 |
+
do not include any terminating "/" character
|
61 |
+
destination: str
|
62 |
+
Root path to copy into. The contents of this location should be
|
63 |
+
identical to the contents of ``source`` when done. This will be made a
|
64 |
+
directory, and the terminal "/" should not be included.
|
65 |
+
delete_missing: bool
|
66 |
+
If there are paths in the destination that don't exist in the
|
67 |
+
source and this is True, delete them. Otherwise, leave them alone.
|
68 |
+
source_field: str | callable
|
69 |
+
If ``update_field`` is "different", this is the key in the info
|
70 |
+
of source files to consider for difference. Maybe a function of the
|
71 |
+
info dict.
|
72 |
+
dest_field: str | callable
|
73 |
+
If ``update_field`` is "different", this is the key in the info
|
74 |
+
of destination files to consider for difference. May be a function of
|
75 |
+
the info dict.
|
76 |
+
update_cond: "different"|"always"|"never"
|
77 |
+
If "always", every file is copied, regardless of whether it exists in
|
78 |
+
the destination. If "never", files that exist in the destination are
|
79 |
+
not copied again. If "different" (default), only copy if the info
|
80 |
+
fields given by ``source_field`` and ``dest_field`` (usually "size")
|
81 |
+
are different. Other comparisons may be added in the future.
|
82 |
+
inst_kwargs: dict|None
|
83 |
+
If ``fs`` is None, use this set of keyword arguments to make a
|
84 |
+
GenericFileSystem instance
|
85 |
+
fs: GenericFileSystem|None
|
86 |
+
Instance to use if explicitly given. The instance defines how to
|
87 |
+
to make downstream file system instances from paths.
|
88 |
+
|
89 |
+
Returns
|
90 |
+
-------
|
91 |
+
dict of the copy operations that were performed, {source: destination}
|
92 |
+
"""
|
93 |
+
fs = fs or GenericFileSystem(**(inst_kwargs or {}))
|
94 |
+
source = fs._strip_protocol(source)
|
95 |
+
destination = fs._strip_protocol(destination)
|
96 |
+
allfiles = fs.find(source, withdirs=True, detail=True)
|
97 |
+
if not fs.isdir(source):
|
98 |
+
raise ValueError("Can only rsync on a directory")
|
99 |
+
otherfiles = fs.find(destination, withdirs=True, detail=True)
|
100 |
+
dirs = [
|
101 |
+
a
|
102 |
+
for a, v in allfiles.items()
|
103 |
+
if v["type"] == "directory" and a.replace(source, destination) not in otherfiles
|
104 |
+
]
|
105 |
+
logger.debug(f"{len(dirs)} directories to create")
|
106 |
+
if dirs:
|
107 |
+
fs.make_many_dirs(
|
108 |
+
[dirn.replace(source, destination) for dirn in dirs], exist_ok=True
|
109 |
+
)
|
110 |
+
allfiles = {a: v for a, v in allfiles.items() if v["type"] == "file"}
|
111 |
+
logger.debug(f"{len(allfiles)} files to consider for copy")
|
112 |
+
to_delete = [
|
113 |
+
o
|
114 |
+
for o, v in otherfiles.items()
|
115 |
+
if o.replace(destination, source) not in allfiles and v["type"] == "file"
|
116 |
+
]
|
117 |
+
for k, v in allfiles.copy().items():
|
118 |
+
otherfile = k.replace(source, destination)
|
119 |
+
if otherfile in otherfiles:
|
120 |
+
if update_cond == "always":
|
121 |
+
allfiles[k] = otherfile
|
122 |
+
elif update_cond == "different":
|
123 |
+
inf1 = source_field(v) if callable(source_field) else v[source_field]
|
124 |
+
v2 = otherfiles[otherfile]
|
125 |
+
inf2 = dest_field(v2) if callable(dest_field) else v2[dest_field]
|
126 |
+
if inf1 != inf2:
|
127 |
+
# details mismatch, make copy
|
128 |
+
allfiles[k] = otherfile
|
129 |
+
else:
|
130 |
+
# details match, don't copy
|
131 |
+
allfiles.pop(k)
|
132 |
+
else:
|
133 |
+
# file not in target yet
|
134 |
+
allfiles[k] = otherfile
|
135 |
+
logger.debug(f"{len(allfiles)} files to copy")
|
136 |
+
if allfiles:
|
137 |
+
source_files, target_files = zip(*allfiles.items())
|
138 |
+
fs.cp(source_files, target_files, **kwargs)
|
139 |
+
logger.debug(f"{len(to_delete)} files to delete")
|
140 |
+
if delete_missing and to_delete:
|
141 |
+
fs.rm(to_delete)
|
142 |
+
return allfiles
|
143 |
+
|
144 |
+
|
145 |
+
class GenericFileSystem(AsyncFileSystem):
|
146 |
+
"""Wrapper over all other FS types
|
147 |
+
|
148 |
+
<experimental!>
|
149 |
+
|
150 |
+
This implementation is a single unified interface to be able to run FS operations
|
151 |
+
over generic URLs, and dispatch to the specific implementations using the URL
|
152 |
+
protocol prefix.
|
153 |
+
|
154 |
+
Note: instances of this FS are always async, even if you never use it with any async
|
155 |
+
backend.
|
156 |
+
"""
|
157 |
+
|
158 |
+
protocol = "generic" # there is no real reason to ever use a protocol with this FS
|
159 |
+
|
160 |
+
def __init__(self, default_method="default", storage_options=None, **kwargs):
|
161 |
+
"""
|
162 |
+
|
163 |
+
Parameters
|
164 |
+
----------
|
165 |
+
default_method: str (optional)
|
166 |
+
Defines how to configure backend FS instances. Options are:
|
167 |
+
- "default": instantiate like FSClass(), with no
|
168 |
+
extra arguments; this is the default instance of that FS, and can be
|
169 |
+
configured via the config system
|
170 |
+
- "generic": takes instances from the `_generic_fs` dict in this module,
|
171 |
+
which you must populate before use. Keys are by protocol
|
172 |
+
- "options": expects storage_options, a dict mapping protocol to
|
173 |
+
kwargs to use when constructing the filesystem
|
174 |
+
- "current": takes the most recently instantiated version of each FS
|
175 |
+
"""
|
176 |
+
self.method = default_method
|
177 |
+
self.st_opts = storage_options
|
178 |
+
super().__init__(**kwargs)
|
179 |
+
|
180 |
+
def _parent(self, path):
|
181 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
182 |
+
return fs.unstrip_protocol(fs._parent(path))
|
183 |
+
|
184 |
+
def _strip_protocol(self, path):
|
185 |
+
# normalization only
|
186 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
187 |
+
return fs.unstrip_protocol(fs._strip_protocol(path))
|
188 |
+
|
189 |
+
async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
190 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
191 |
+
if fs.async_impl:
|
192 |
+
out = await fs._find(
|
193 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
194 |
+
)
|
195 |
+
else:
|
196 |
+
out = fs.find(
|
197 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
198 |
+
)
|
199 |
+
result = {}
|
200 |
+
for k, v in out.items():
|
201 |
+
v = v.copy() # don't corrupt target FS dircache
|
202 |
+
name = fs.unstrip_protocol(k)
|
203 |
+
v["name"] = name
|
204 |
+
result[name] = v
|
205 |
+
if detail:
|
206 |
+
return result
|
207 |
+
return list(result)
|
208 |
+
|
209 |
+
async def _info(self, url, **kwargs):
|
210 |
+
fs = _resolve_fs(url, self.method)
|
211 |
+
if fs.async_impl:
|
212 |
+
out = await fs._info(url, **kwargs)
|
213 |
+
else:
|
214 |
+
out = fs.info(url, **kwargs)
|
215 |
+
out = out.copy() # don't edit originals
|
216 |
+
out["name"] = fs.unstrip_protocol(out["name"])
|
217 |
+
return out
|
218 |
+
|
219 |
+
async def _ls(
|
220 |
+
self,
|
221 |
+
url,
|
222 |
+
detail=True,
|
223 |
+
**kwargs,
|
224 |
+
):
|
225 |
+
fs = _resolve_fs(url, self.method)
|
226 |
+
if fs.async_impl:
|
227 |
+
out = await fs._ls(url, detail=True, **kwargs)
|
228 |
+
else:
|
229 |
+
out = fs.ls(url, detail=True, **kwargs)
|
230 |
+
out = [o.copy() for o in out] # don't edit originals
|
231 |
+
for o in out:
|
232 |
+
o["name"] = fs.unstrip_protocol(o["name"])
|
233 |
+
if detail:
|
234 |
+
return out
|
235 |
+
else:
|
236 |
+
return [o["name"] for o in out]
|
237 |
+
|
238 |
+
async def _cat_file(
|
239 |
+
self,
|
240 |
+
url,
|
241 |
+
**kwargs,
|
242 |
+
):
|
243 |
+
fs = _resolve_fs(url, self.method)
|
244 |
+
if fs.async_impl:
|
245 |
+
return await fs._cat_file(url, **kwargs)
|
246 |
+
else:
|
247 |
+
return fs.cat_file(url, **kwargs)
|
248 |
+
|
249 |
+
async def _pipe_file(
|
250 |
+
self,
|
251 |
+
path,
|
252 |
+
value,
|
253 |
+
**kwargs,
|
254 |
+
):
|
255 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
256 |
+
if fs.async_impl:
|
257 |
+
return await fs._pipe_file(path, value, **kwargs)
|
258 |
+
else:
|
259 |
+
return fs.pipe_file(path, value, **kwargs)
|
260 |
+
|
261 |
+
async def _rm(self, url, **kwargs):
|
262 |
+
urls = url
|
263 |
+
if isinstance(urls, str):
|
264 |
+
urls = [urls]
|
265 |
+
fs = _resolve_fs(urls[0], self.method)
|
266 |
+
if fs.async_impl:
|
267 |
+
await fs._rm(urls, **kwargs)
|
268 |
+
else:
|
269 |
+
fs.rm(url, **kwargs)
|
270 |
+
|
271 |
+
async def _makedirs(self, path, exist_ok=False):
|
272 |
+
logger.debug("Make dir %s", path)
|
273 |
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
274 |
+
if fs.async_impl:
|
275 |
+
await fs._makedirs(path, exist_ok=exist_ok)
|
276 |
+
else:
|
277 |
+
fs.makedirs(path, exist_ok=exist_ok)
|
278 |
+
|
279 |
+
def rsync(self, source, destination, **kwargs):
|
280 |
+
"""Sync files between two directory trees
|
281 |
+
|
282 |
+
See `func:rsync` for more details.
|
283 |
+
"""
|
284 |
+
rsync(source, destination, fs=self, **kwargs)
|
285 |
+
|
286 |
+
async def _cp_file(
|
287 |
+
self,
|
288 |
+
url,
|
289 |
+
url2,
|
290 |
+
blocksize=2**20,
|
291 |
+
callback=DEFAULT_CALLBACK,
|
292 |
+
tempdir: Optional[str] = None,
|
293 |
+
**kwargs,
|
294 |
+
):
|
295 |
+
fs = _resolve_fs(url, self.method)
|
296 |
+
fs2 = _resolve_fs(url2, self.method)
|
297 |
+
if fs is fs2:
|
298 |
+
# pure remote
|
299 |
+
if fs.async_impl:
|
300 |
+
return await fs._copy(url, url2, **kwargs)
|
301 |
+
else:
|
302 |
+
return fs.copy(url, url2, **kwargs)
|
303 |
+
await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
|
304 |
+
|
305 |
+
async def _make_many_dirs(self, urls, exist_ok=True):
|
306 |
+
fs = _resolve_fs(urls[0], self.method)
|
307 |
+
if fs.async_impl:
|
308 |
+
coros = [fs._makedirs(u, exist_ok=exist_ok) for u in urls]
|
309 |
+
await _run_coros_in_chunks(coros)
|
310 |
+
else:
|
311 |
+
for u in urls:
|
312 |
+
fs.makedirs(u, exist_ok=exist_ok)
|
313 |
+
|
314 |
+
make_many_dirs = sync_wrapper(_make_many_dirs)
|
315 |
+
|
316 |
+
async def _copy(
|
317 |
+
self,
|
318 |
+
path1: list[str],
|
319 |
+
path2: list[str],
|
320 |
+
recursive: bool = False,
|
321 |
+
on_error: str = "ignore",
|
322 |
+
maxdepth: Optional[int] = None,
|
323 |
+
batch_size: Optional[int] = None,
|
324 |
+
tempdir: Optional[str] = None,
|
325 |
+
**kwargs,
|
326 |
+
):
|
327 |
+
# TODO: special case for one FS being local, which can use get/put
|
328 |
+
# TODO: special case for one being memFS, which can use cat/pipe
|
329 |
+
if recursive:
|
330 |
+
raise NotImplementedError("Please use fsspec.generic.rsync")
|
331 |
+
path1 = [path1] if isinstance(path1, str) else path1
|
332 |
+
path2 = [path2] if isinstance(path2, str) else path2
|
333 |
+
|
334 |
+
fs = _resolve_fs(path1, self.method)
|
335 |
+
fs2 = _resolve_fs(path2, self.method)
|
336 |
+
|
337 |
+
if fs is fs2:
|
338 |
+
if fs.async_impl:
|
339 |
+
return await fs._copy(path1, path2, **kwargs)
|
340 |
+
else:
|
341 |
+
return fs.copy(path1, path2, **kwargs)
|
342 |
+
|
343 |
+
await copy_file_op(
|
344 |
+
fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
|
345 |
+
)
|
346 |
+
|
347 |
+
|
348 |
+
async def copy_file_op(
|
349 |
+
fs1, url1, fs2, url2, tempdir=None, batch_size=20, on_error="ignore"
|
350 |
+
):
|
351 |
+
import tempfile
|
352 |
+
|
353 |
+
tempdir = tempdir or tempfile.mkdtemp()
|
354 |
+
try:
|
355 |
+
coros = [
|
356 |
+
_copy_file_op(
|
357 |
+
fs1,
|
358 |
+
u1,
|
359 |
+
fs2,
|
360 |
+
u2,
|
361 |
+
os.path.join(tempdir, uuid.uuid4().hex),
|
362 |
+
)
|
363 |
+
for u1, u2 in zip(url1, url2)
|
364 |
+
]
|
365 |
+
out = await _run_coros_in_chunks(
|
366 |
+
coros, batch_size=batch_size, return_exceptions=True
|
367 |
+
)
|
368 |
+
finally:
|
369 |
+
shutil.rmtree(tempdir)
|
370 |
+
if on_error == "return":
|
371 |
+
return out
|
372 |
+
elif on_error == "raise":
|
373 |
+
for o in out:
|
374 |
+
if isinstance(o, Exception):
|
375 |
+
raise o
|
376 |
+
|
377 |
+
|
378 |
+
async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
|
379 |
+
if fs1.async_impl:
|
380 |
+
await fs1._get_file(url1, local)
|
381 |
+
else:
|
382 |
+
fs1.get_file(url1, local)
|
383 |
+
if fs2.async_impl:
|
384 |
+
await fs2._put_file(local, url2)
|
385 |
+
else:
|
386 |
+
fs2.put_file(local, url2)
|
387 |
+
os.unlink(local)
|
388 |
+
logger.debug("Copy %s -> %s; done", url1, url2)
|
389 |
+
|
390 |
+
|
391 |
+
async def maybe_await(cor):
|
392 |
+
if inspect.iscoroutine(cor):
|
393 |
+
return await cor
|
394 |
+
else:
|
395 |
+
return cor
|
venv/lib/python3.12/site-packages/fsspec/gui.py
ADDED
@@ -0,0 +1,416 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ast
|
2 |
+
import contextlib
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
import re
|
6 |
+
from typing import ClassVar, Sequence
|
7 |
+
|
8 |
+
import panel as pn
|
9 |
+
|
10 |
+
from .core import OpenFile, get_filesystem_class, split_protocol
|
11 |
+
from .registry import known_implementations
|
12 |
+
|
13 |
+
pn.extension()
|
14 |
+
logger = logging.getLogger("fsspec.gui")
|
15 |
+
|
16 |
+
|
17 |
+
class SigSlot:
|
18 |
+
"""Signal-slot mixin, for Panel event passing
|
19 |
+
|
20 |
+
Include this class in a widget manager's superclasses to be able to
|
21 |
+
register events and callbacks on Panel widgets managed by that class.
|
22 |
+
|
23 |
+
The method ``_register`` should be called as widgets are added, and external
|
24 |
+
code should call ``connect`` to associate callbacks.
|
25 |
+
|
26 |
+
By default, all signals emit a DEBUG logging statement.
|
27 |
+
"""
|
28 |
+
|
29 |
+
# names of signals that this class may emit each of which must be
|
30 |
+
# set by _register for any new instance
|
31 |
+
signals: ClassVar[Sequence[str]] = []
|
32 |
+
# names of actions that this class may respond to
|
33 |
+
slots: ClassVar[Sequence[str]] = []
|
34 |
+
|
35 |
+
# each of which must be a method name
|
36 |
+
|
37 |
+
def __init__(self):
|
38 |
+
self._ignoring_events = False
|
39 |
+
self._sigs = {}
|
40 |
+
self._map = {}
|
41 |
+
self._setup()
|
42 |
+
|
43 |
+
def _setup(self):
|
44 |
+
"""Create GUI elements and register signals"""
|
45 |
+
self.panel = pn.pane.PaneBase()
|
46 |
+
# no signals to set up in the base class
|
47 |
+
|
48 |
+
def _register(
|
49 |
+
self, widget, name, thing="value", log_level=logging.DEBUG, auto=False
|
50 |
+
):
|
51 |
+
"""Watch the given attribute of a widget and assign it a named event
|
52 |
+
|
53 |
+
This is normally called at the time a widget is instantiated, in the
|
54 |
+
class which owns it.
|
55 |
+
|
56 |
+
Parameters
|
57 |
+
----------
|
58 |
+
widget : pn.layout.Panel or None
|
59 |
+
Widget to watch. If None, an anonymous signal not associated with
|
60 |
+
any widget.
|
61 |
+
name : str
|
62 |
+
Name of this event
|
63 |
+
thing : str
|
64 |
+
Attribute of the given widget to watch
|
65 |
+
log_level : int
|
66 |
+
When the signal is triggered, a logging event of the given level
|
67 |
+
will be fired in the dfviz logger.
|
68 |
+
auto : bool
|
69 |
+
If True, automatically connects with a method in this class of the
|
70 |
+
same name.
|
71 |
+
"""
|
72 |
+
if name not in self.signals:
|
73 |
+
raise ValueError(f"Attempt to assign an undeclared signal: {name}")
|
74 |
+
self._sigs[name] = {
|
75 |
+
"widget": widget,
|
76 |
+
"callbacks": [],
|
77 |
+
"thing": thing,
|
78 |
+
"log": log_level,
|
79 |
+
}
|
80 |
+
wn = "-".join(
|
81 |
+
[
|
82 |
+
getattr(widget, "name", str(widget)) if widget is not None else "none",
|
83 |
+
thing,
|
84 |
+
]
|
85 |
+
)
|
86 |
+
self._map[wn] = name
|
87 |
+
if widget is not None:
|
88 |
+
widget.param.watch(self._signal, thing, onlychanged=True)
|
89 |
+
if auto and hasattr(self, name):
|
90 |
+
self.connect(name, getattr(self, name))
|
91 |
+
|
92 |
+
def _repr_mimebundle_(self, *args, **kwargs):
|
93 |
+
"""Display in a notebook or a server"""
|
94 |
+
try:
|
95 |
+
return self.panel._repr_mimebundle_(*args, **kwargs)
|
96 |
+
except (ValueError, AttributeError) as exc:
|
97 |
+
raise NotImplementedError(
|
98 |
+
"Panel does not seem to be set up properly"
|
99 |
+
) from exc
|
100 |
+
|
101 |
+
def connect(self, signal, slot):
|
102 |
+
"""Associate call back with given event
|
103 |
+
|
104 |
+
The callback must be a function which takes the "new" value of the
|
105 |
+
watched attribute as the only parameter. If the callback return False,
|
106 |
+
this cancels any further processing of the given event.
|
107 |
+
|
108 |
+
Alternatively, the callback can be a string, in which case it means
|
109 |
+
emitting the correspondingly-named event (i.e., connect to self)
|
110 |
+
"""
|
111 |
+
self._sigs[signal]["callbacks"].append(slot)
|
112 |
+
|
113 |
+
def _signal(self, event):
|
114 |
+
"""This is called by a an action on a widget
|
115 |
+
|
116 |
+
Within an self.ignore_events context, nothing happens.
|
117 |
+
|
118 |
+
Tests can execute this method by directly changing the values of
|
119 |
+
widget components.
|
120 |
+
"""
|
121 |
+
if not self._ignoring_events:
|
122 |
+
wn = "-".join([event.obj.name, event.name])
|
123 |
+
if wn in self._map and self._map[wn] in self._sigs:
|
124 |
+
self._emit(self._map[wn], event.new)
|
125 |
+
|
126 |
+
@contextlib.contextmanager
|
127 |
+
def ignore_events(self):
|
128 |
+
"""Temporarily turn off events processing in this instance
|
129 |
+
|
130 |
+
(does not propagate to children)
|
131 |
+
"""
|
132 |
+
self._ignoring_events = True
|
133 |
+
try:
|
134 |
+
yield
|
135 |
+
finally:
|
136 |
+
self._ignoring_events = False
|
137 |
+
|
138 |
+
def _emit(self, sig, value=None):
|
139 |
+
"""An event happened, call its callbacks
|
140 |
+
|
141 |
+
This method can be used in tests to simulate message passing without
|
142 |
+
directly changing visual elements.
|
143 |
+
|
144 |
+
Calling of callbacks will halt whenever one returns False.
|
145 |
+
"""
|
146 |
+
logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
|
147 |
+
for callback in self._sigs[sig]["callbacks"]:
|
148 |
+
if isinstance(callback, str):
|
149 |
+
self._emit(callback)
|
150 |
+
else:
|
151 |
+
try:
|
152 |
+
# running callbacks should not break the interface
|
153 |
+
ret = callback(value)
|
154 |
+
if ret is False:
|
155 |
+
break
|
156 |
+
except Exception as e:
|
157 |
+
logger.exception(
|
158 |
+
"Exception (%s) while executing callback for signal: %s",
|
159 |
+
e,
|
160 |
+
sig,
|
161 |
+
)
|
162 |
+
|
163 |
+
def show(self, threads=False):
|
164 |
+
"""Open a new browser tab and display this instance's interface"""
|
165 |
+
self.panel.show(threads=threads, verbose=False)
|
166 |
+
return self
|
167 |
+
|
168 |
+
|
169 |
+
class SingleSelect(SigSlot):
|
170 |
+
"""A multiselect which only allows you to select one item for an event"""
|
171 |
+
|
172 |
+
signals = ["_selected", "selected"] # the first is internal
|
173 |
+
slots = ["set_options", "set_selection", "add", "clear", "select"]
|
174 |
+
|
175 |
+
def __init__(self, **kwargs):
|
176 |
+
self.kwargs = kwargs
|
177 |
+
super().__init__()
|
178 |
+
|
179 |
+
def _setup(self):
|
180 |
+
self.panel = pn.widgets.MultiSelect(**self.kwargs)
|
181 |
+
self._register(self.panel, "_selected", "value")
|
182 |
+
self._register(None, "selected")
|
183 |
+
self.connect("_selected", self.select_one)
|
184 |
+
|
185 |
+
def _signal(self, *args, **kwargs):
|
186 |
+
super()._signal(*args, **kwargs)
|
187 |
+
|
188 |
+
def select_one(self, *_):
|
189 |
+
with self.ignore_events():
|
190 |
+
val = [self.panel.value[-1]] if self.panel.value else []
|
191 |
+
self.panel.value = val
|
192 |
+
self._emit("selected", self.panel.value)
|
193 |
+
|
194 |
+
def set_options(self, options):
|
195 |
+
self.panel.options = options
|
196 |
+
|
197 |
+
def clear(self):
|
198 |
+
self.panel.options = []
|
199 |
+
|
200 |
+
@property
|
201 |
+
def value(self):
|
202 |
+
return self.panel.value
|
203 |
+
|
204 |
+
def set_selection(self, selection):
|
205 |
+
self.panel.value = [selection]
|
206 |
+
|
207 |
+
|
208 |
+
class FileSelector(SigSlot):
|
209 |
+
"""Panel-based graphical file selector widget
|
210 |
+
|
211 |
+
Instances of this widget are interactive and can be displayed in jupyter by having
|
212 |
+
them as the output of a cell, or in a separate browser tab using ``.show()``.
|
213 |
+
"""
|
214 |
+
|
215 |
+
signals = [
|
216 |
+
"protocol_changed",
|
217 |
+
"selection_changed",
|
218 |
+
"directory_entered",
|
219 |
+
"home_clicked",
|
220 |
+
"up_clicked",
|
221 |
+
"go_clicked",
|
222 |
+
"filters_changed",
|
223 |
+
]
|
224 |
+
slots = ["set_filters", "go_home"]
|
225 |
+
|
226 |
+
def __init__(self, url=None, filters=None, ignore=None, kwargs=None):
|
227 |
+
"""
|
228 |
+
|
229 |
+
Parameters
|
230 |
+
----------
|
231 |
+
url : str (optional)
|
232 |
+
Initial value of the URL to populate the dialog; should include protocol
|
233 |
+
filters : list(str) (optional)
|
234 |
+
File endings to include in the listings. If not included, all files are
|
235 |
+
allowed. Does not affect directories.
|
236 |
+
If given, the endings will appear as checkboxes in the interface
|
237 |
+
ignore : list(str) (optional)
|
238 |
+
Regex(s) of file basename patterns to ignore, e.g., "\\." for typical
|
239 |
+
hidden files on posix
|
240 |
+
kwargs : dict (optional)
|
241 |
+
To pass to file system instance
|
242 |
+
"""
|
243 |
+
if url:
|
244 |
+
self.init_protocol, url = split_protocol(url)
|
245 |
+
else:
|
246 |
+
self.init_protocol, url = "file", os.getcwd()
|
247 |
+
self.init_url = url
|
248 |
+
self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
|
249 |
+
self.filters = filters
|
250 |
+
self.ignore = [re.compile(i) for i in ignore or []]
|
251 |
+
self._fs = None
|
252 |
+
super().__init__()
|
253 |
+
|
254 |
+
def _setup(self):
|
255 |
+
self.url = pn.widgets.TextInput(
|
256 |
+
name="url",
|
257 |
+
value=self.init_url,
|
258 |
+
align="end",
|
259 |
+
sizing_mode="stretch_width",
|
260 |
+
width_policy="max",
|
261 |
+
)
|
262 |
+
self.protocol = pn.widgets.Select(
|
263 |
+
options=sorted(known_implementations),
|
264 |
+
value=self.init_protocol,
|
265 |
+
name="protocol",
|
266 |
+
align="center",
|
267 |
+
)
|
268 |
+
self.kwargs = pn.widgets.TextInput(
|
269 |
+
name="kwargs", value=self.init_kwargs, align="center"
|
270 |
+
)
|
271 |
+
self.go = pn.widgets.Button(name="⇨", align="end", width=45)
|
272 |
+
self.main = SingleSelect(size=10)
|
273 |
+
self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
|
274 |
+
self.up = pn.widgets.Button(name="‹", width=30, height=30, align="end")
|
275 |
+
|
276 |
+
self._register(self.protocol, "protocol_changed", auto=True)
|
277 |
+
self._register(self.go, "go_clicked", "clicks", auto=True)
|
278 |
+
self._register(self.up, "up_clicked", "clicks", auto=True)
|
279 |
+
self._register(self.home, "home_clicked", "clicks", auto=True)
|
280 |
+
self._register(None, "selection_changed")
|
281 |
+
self.main.connect("selected", self.selection_changed)
|
282 |
+
self._register(None, "directory_entered")
|
283 |
+
self.prev_protocol = self.protocol.value
|
284 |
+
self.prev_kwargs = self.storage_options
|
285 |
+
|
286 |
+
self.filter_sel = pn.widgets.CheckBoxGroup(
|
287 |
+
value=[], options=[], inline=False, align="end", width_policy="min"
|
288 |
+
)
|
289 |
+
self._register(self.filter_sel, "filters_changed", auto=True)
|
290 |
+
|
291 |
+
self.panel = pn.Column(
|
292 |
+
pn.Row(self.protocol, self.kwargs),
|
293 |
+
pn.Row(self.home, self.up, self.url, self.go, self.filter_sel),
|
294 |
+
self.main.panel,
|
295 |
+
)
|
296 |
+
self.set_filters(self.filters)
|
297 |
+
self.go_clicked()
|
298 |
+
|
299 |
+
def set_filters(self, filters=None):
|
300 |
+
self.filters = filters
|
301 |
+
if filters:
|
302 |
+
self.filter_sel.options = filters
|
303 |
+
self.filter_sel.value = filters
|
304 |
+
else:
|
305 |
+
self.filter_sel.options = []
|
306 |
+
self.filter_sel.value = []
|
307 |
+
|
308 |
+
@property
|
309 |
+
def storage_options(self):
|
310 |
+
"""Value of the kwargs box as a dictionary"""
|
311 |
+
return ast.literal_eval(self.kwargs.value) or {}
|
312 |
+
|
313 |
+
@property
|
314 |
+
def fs(self):
|
315 |
+
"""Current filesystem instance"""
|
316 |
+
if self._fs is None:
|
317 |
+
cls = get_filesystem_class(self.protocol.value)
|
318 |
+
self._fs = cls(**self.storage_options)
|
319 |
+
return self._fs
|
320 |
+
|
321 |
+
@property
|
322 |
+
def urlpath(self):
|
323 |
+
"""URL of currently selected item"""
|
324 |
+
return (
|
325 |
+
(f"{self.protocol.value}://{self.main.value[0]}")
|
326 |
+
if self.main.value
|
327 |
+
else None
|
328 |
+
)
|
329 |
+
|
330 |
+
def open_file(self, mode="rb", compression=None, encoding=None):
|
331 |
+
"""Create OpenFile instance for the currently selected item
|
332 |
+
|
333 |
+
For example, in a notebook you might do something like
|
334 |
+
|
335 |
+
.. code-block::
|
336 |
+
|
337 |
+
[ ]: sel = FileSelector(); sel
|
338 |
+
|
339 |
+
# user selects their file
|
340 |
+
|
341 |
+
[ ]: with sel.open_file('rb') as f:
|
342 |
+
... out = f.read()
|
343 |
+
|
344 |
+
Parameters
|
345 |
+
----------
|
346 |
+
mode: str (optional)
|
347 |
+
Open mode for the file.
|
348 |
+
compression: str (optional)
|
349 |
+
The interact with the file as compressed. Set to 'infer' to guess
|
350 |
+
compression from the file ending
|
351 |
+
encoding: str (optional)
|
352 |
+
If using text mode, use this encoding; defaults to UTF8.
|
353 |
+
"""
|
354 |
+
if self.urlpath is None:
|
355 |
+
raise ValueError("No file selected")
|
356 |
+
return OpenFile(self.fs, self.urlpath, mode, compression, encoding)
|
357 |
+
|
358 |
+
def filters_changed(self, values):
|
359 |
+
self.filters = values
|
360 |
+
self.go_clicked()
|
361 |
+
|
362 |
+
def selection_changed(self, *_):
|
363 |
+
if self.urlpath is None:
|
364 |
+
return
|
365 |
+
if self.fs.isdir(self.urlpath):
|
366 |
+
self.url.value = self.fs._strip_protocol(self.urlpath)
|
367 |
+
self.go_clicked()
|
368 |
+
|
369 |
+
def go_clicked(self, *_):
|
370 |
+
if (
|
371 |
+
self.prev_protocol != self.protocol.value
|
372 |
+
or self.prev_kwargs != self.storage_options
|
373 |
+
):
|
374 |
+
self._fs = None # causes fs to be recreated
|
375 |
+
self.prev_protocol = self.protocol.value
|
376 |
+
self.prev_kwargs = self.storage_options
|
377 |
+
listing = sorted(
|
378 |
+
self.fs.ls(self.url.value, detail=True), key=lambda x: x["name"]
|
379 |
+
)
|
380 |
+
listing = [
|
381 |
+
l
|
382 |
+
for l in listing
|
383 |
+
if not any(i.match(l["name"].rsplit("/", 1)[-1]) for i in self.ignore)
|
384 |
+
]
|
385 |
+
folders = {
|
386 |
+
"📁 " + o["name"].rsplit("/", 1)[-1]: o["name"]
|
387 |
+
for o in listing
|
388 |
+
if o["type"] == "directory"
|
389 |
+
}
|
390 |
+
files = {
|
391 |
+
"📄 " + o["name"].rsplit("/", 1)[-1]: o["name"]
|
392 |
+
for o in listing
|
393 |
+
if o["type"] == "file"
|
394 |
+
}
|
395 |
+
if self.filters:
|
396 |
+
files = {
|
397 |
+
k: v
|
398 |
+
for k, v in files.items()
|
399 |
+
if any(v.endswith(ext) for ext in self.filters)
|
400 |
+
}
|
401 |
+
self.main.set_options(dict(**folders, **files))
|
402 |
+
|
403 |
+
def protocol_changed(self, *_):
|
404 |
+
self._fs = None
|
405 |
+
self.main.options = []
|
406 |
+
self.url.value = ""
|
407 |
+
|
408 |
+
def home_clicked(self, *_):
|
409 |
+
self.protocol.value = self.init_protocol
|
410 |
+
self.kwargs.value = self.init_kwargs
|
411 |
+
self.url.value = self.init_url
|
412 |
+
self.go_clicked()
|
413 |
+
|
414 |
+
def up_clicked(self, *_):
|
415 |
+
self.url.value = self.fs._parent(self.url.value)
|
416 |
+
self.go_clicked()
|