feat(fetch-listing): poc recursive fetching
This commit is contained in:
parent
6df68a7e9c
commit
cfff120c9b
5 changed files with 1370 additions and 133 deletions
290
Cargo.lock
generated
290
Cargo.lock
generated
|
@ -342,6 +342,12 @@ version = "3.17.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
|
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "byteorder"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytes"
|
name = "bytes"
|
||||||
version = "1.10.1"
|
version = "1.10.1"
|
||||||
|
@ -512,6 +518,29 @@ dependencies = [
|
||||||
"typenum",
|
"typenum",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cssparser"
|
||||||
|
version = "0.34.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3"
|
||||||
|
dependencies = [
|
||||||
|
"cssparser-macros",
|
||||||
|
"dtoa-short",
|
||||||
|
"itoa",
|
||||||
|
"phf",
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cssparser-macros"
|
||||||
|
version = "0.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
|
||||||
|
dependencies = [
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.100",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "darling"
|
name = "darling"
|
||||||
version = "0.20.11"
|
version = "0.20.11"
|
||||||
|
@ -563,6 +592,17 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_more"
|
||||||
|
version = "0.99.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3da29a38df43d6f156149c9b43ded5e018ddff2a855cf2cfd62e8cd7d079c69f"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.100",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "digest"
|
name = "digest"
|
||||||
version = "0.10.7"
|
version = "0.10.7"
|
||||||
|
@ -591,6 +631,27 @@ version = "0.3.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dtoa"
|
||||||
|
version = "1.0.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dtoa-short"
|
||||||
|
version = "0.3.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
|
||||||
|
dependencies = [
|
||||||
|
"dtoa",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ego-tree"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "either"
|
name = "either"
|
||||||
version = "1.15.0"
|
version = "1.15.0"
|
||||||
|
@ -727,6 +788,16 @@ dependencies = [
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futf"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
|
||||||
|
dependencies = [
|
||||||
|
"mac",
|
||||||
|
"new_debug_unreachable",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures"
|
name = "futures"
|
||||||
version = "0.3.31"
|
version = "0.3.31"
|
||||||
|
@ -816,6 +887,15 @@ dependencies = [
|
||||||
"slab",
|
"slab",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fxhash"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||||
|
dependencies = [
|
||||||
|
"byteorder",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "gcp_auth"
|
name = "gcp_auth"
|
||||||
version = "0.12.3"
|
version = "0.12.3"
|
||||||
|
@ -853,6 +933,15 @@ dependencies = [
|
||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "getopts"
|
||||||
|
version = "0.2.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-width 0.1.14",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getrandom"
|
name = "getrandom"
|
||||||
version = "0.2.15"
|
version = "0.2.15"
|
||||||
|
@ -958,6 +1047,18 @@ dependencies = [
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html5ever"
|
||||||
|
version = "0.29.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"mac",
|
||||||
|
"markup5ever",
|
||||||
|
"match_token",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http"
|
name = "http"
|
||||||
version = "1.3.1"
|
version = "1.3.1"
|
||||||
|
@ -1429,6 +1530,37 @@ version = "0.4.27"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mac"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "markup5ever"
|
||||||
|
version = "0.14.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"phf",
|
||||||
|
"phf_codegen",
|
||||||
|
"string_cache",
|
||||||
|
"string_cache_codegen",
|
||||||
|
"tendril",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "match_token"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.100",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "matchers"
|
name = "matchers"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
@ -1509,6 +1641,12 @@ dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "new_debug_unreachable"
|
||||||
|
version = "1.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nu-ansi-term"
|
name = "nu-ansi-term"
|
||||||
version = "0.46.0"
|
version = "0.46.0"
|
||||||
|
@ -1685,6 +1823,58 @@ dependencies = [
|
||||||
"indexmap 2.9.0",
|
"indexmap 2.9.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
|
||||||
|
dependencies = [
|
||||||
|
"phf_macros",
|
||||||
|
"phf_shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_codegen"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator",
|
||||||
|
"phf_shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_generator"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared",
|
||||||
|
"rand 0.8.5",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_macros"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator",
|
||||||
|
"phf_shared",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.100",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_shared"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
|
||||||
|
dependencies = [
|
||||||
|
"siphasher",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pin-project"
|
name = "pin-project"
|
||||||
version = "1.1.10"
|
version = "1.1.10"
|
||||||
|
@ -1744,6 +1934,12 @@ dependencies = [
|
||||||
"zerocopy",
|
"zerocopy",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "precomputed-hash"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prettyplease"
|
name = "prettyplease"
|
||||||
version = "0.2.32"
|
version = "0.2.32"
|
||||||
|
@ -2300,6 +2496,21 @@ version = "1.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "scraper"
|
||||||
|
version = "0.23.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2"
|
||||||
|
dependencies = [
|
||||||
|
"cssparser",
|
||||||
|
"ego-tree",
|
||||||
|
"getopts",
|
||||||
|
"html5ever",
|
||||||
|
"precomputed-hash",
|
||||||
|
"selectors",
|
||||||
|
"tendril",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "security-framework"
|
name = "security-framework"
|
||||||
version = "2.11.1"
|
version = "2.11.1"
|
||||||
|
@ -2336,6 +2547,25 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "selectors"
|
||||||
|
version = "0.26.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.9.0",
|
||||||
|
"cssparser",
|
||||||
|
"derive_more",
|
||||||
|
"fxhash",
|
||||||
|
"log",
|
||||||
|
"new_debug_unreachable",
|
||||||
|
"phf",
|
||||||
|
"phf_codegen",
|
||||||
|
"precomputed-hash",
|
||||||
|
"servo_arc",
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.219"
|
version = "1.0.219"
|
||||||
|
@ -2431,6 +2661,15 @@ dependencies = [
|
||||||
"syn 2.0.100",
|
"syn 2.0.100",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "servo_arc"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a"
|
||||||
|
dependencies = [
|
||||||
|
"stable_deref_trait",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sharded-slab"
|
name = "sharded-slab"
|
||||||
version = "0.1.7"
|
version = "0.1.7"
|
||||||
|
@ -2455,8 +2694,11 @@ dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"rusqlite",
|
"rusqlite",
|
||||||
|
"scraper",
|
||||||
|
"serde_json",
|
||||||
"snix-castore",
|
"snix-castore",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
"tokio-stream",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
@ -2470,6 +2712,12 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "siphasher"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "slab"
|
name = "slab"
|
||||||
version = "0.4.9"
|
version = "0.4.9"
|
||||||
|
@ -2588,6 +2836,31 @@ version = "1.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "string_cache"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
|
||||||
|
dependencies = [
|
||||||
|
"new_debug_unreachable",
|
||||||
|
"parking_lot",
|
||||||
|
"phf_shared",
|
||||||
|
"precomputed-hash",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "string_cache_codegen"
|
||||||
|
version = "0.5.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator",
|
||||||
|
"phf_shared",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
|
@ -2676,6 +2949,17 @@ dependencies = [
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tendril"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
|
||||||
|
dependencies = [
|
||||||
|
"futf",
|
||||||
|
"mac",
|
||||||
|
"utf-8",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.69"
|
version = "1.0.69"
|
||||||
|
@ -3129,6 +3413,12 @@ dependencies = [
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf-8"
|
||||||
|
version = "0.7.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf16_iter"
|
name = "utf16_iter"
|
||||||
version = "1.0.5"
|
version = "1.0.5"
|
||||||
|
|
743
Cargo.nix
743
Cargo.nix
|
@ -1104,6 +1104,19 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "default" ];
|
resolvedDefaultFeatures = [ "default" ];
|
||||||
};
|
};
|
||||||
|
"byteorder" = rec {
|
||||||
|
crateName = "byteorder";
|
||||||
|
version = "1.5.0";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "0jzncxyf404mwqdbspihyzpkndfgda450l0893pz5xj685cg5l0z";
|
||||||
|
authors = [
|
||||||
|
"Andrew Gallant <jamslam@gmail.com>"
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"default" = [ "std" ];
|
||||||
|
};
|
||||||
|
resolvedDefaultFeatures = [ "default" "std" ];
|
||||||
|
};
|
||||||
"bytes" = rec {
|
"bytes" = rec {
|
||||||
crateName = "bytes";
|
crateName = "bytes";
|
||||||
version = "1.10.1";
|
version = "1.10.1";
|
||||||
|
@ -1588,6 +1601,65 @@ rec {
|
||||||
"rand_core" = [ "dep:rand_core" ];
|
"rand_core" = [ "dep:rand_core" ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
"cssparser" = rec {
|
||||||
|
crateName = "cssparser";
|
||||||
|
version = "0.34.0";
|
||||||
|
edition = "2018";
|
||||||
|
sha256 = "1qx3hha392szcl812l6hp0d4029gg8x62cl4nf0byqgdv0f6vimp";
|
||||||
|
authors = [
|
||||||
|
"Simon Sapin <simon.sapin@exyr.org>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "cssparser-macros";
|
||||||
|
packageId = "cssparser-macros";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "dtoa-short";
|
||||||
|
packageId = "dtoa-short";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "itoa";
|
||||||
|
packageId = "itoa";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "phf";
|
||||||
|
packageId = "phf";
|
||||||
|
features = [ "macros" ];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "smallvec";
|
||||||
|
packageId = "smallvec";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"serde" = [ "dep:serde" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
"cssparser-macros" = rec {
|
||||||
|
crateName = "cssparser-macros";
|
||||||
|
version = "0.6.1";
|
||||||
|
edition = "2018";
|
||||||
|
sha256 = "0cfkzj60avrnskdmaf7f8zw6pp3di4ylplk455zrzaf19ax8id8k";
|
||||||
|
procMacro = true;
|
||||||
|
libName = "cssparser_macros";
|
||||||
|
libPath = "lib.rs";
|
||||||
|
authors = [
|
||||||
|
"Simon Sapin <simon.sapin@exyr.org>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "quote";
|
||||||
|
packageId = "quote";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "syn";
|
||||||
|
packageId = "syn 2.0.100";
|
||||||
|
features = [ "full" "extra-traits" ];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
"darling" = rec {
|
"darling" = rec {
|
||||||
crateName = "darling";
|
crateName = "darling";
|
||||||
version = "0.20.11";
|
version = "0.20.11";
|
||||||
|
@ -1731,6 +1803,49 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "alloc" "powerfmt" "serde" "std" ];
|
resolvedDefaultFeatures = [ "alloc" "powerfmt" "serde" "std" ];
|
||||||
};
|
};
|
||||||
|
"derive_more" = rec {
|
||||||
|
crateName = "derive_more";
|
||||||
|
version = "0.99.19";
|
||||||
|
edition = "2018";
|
||||||
|
sha256 = "17y6g78dg31fsv7z4p455bzxs670spg476ww2ibg3mj3vww9m8ix";
|
||||||
|
procMacro = true;
|
||||||
|
authors = [
|
||||||
|
"Jelte Fennema <github-tech@jeltef.nl>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "proc-macro2";
|
||||||
|
packageId = "proc-macro2";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "quote";
|
||||||
|
packageId = "quote";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "syn";
|
||||||
|
packageId = "syn 2.0.100";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"convert_case" = [ "dep:convert_case" ];
|
||||||
|
"default" = [ "add_assign" "add" "as_mut" "as_ref" "constructor" "deref" "deref_mut" "display" "error" "from" "from_str" "index" "index_mut" "into" "into_iterator" "iterator" "mul_assign" "mul" "not" "sum" "try_into" "is_variant" "unwrap" ];
|
||||||
|
"display" = [ "syn/extra-traits" ];
|
||||||
|
"error" = [ "syn/extra-traits" ];
|
||||||
|
"from" = [ "syn/extra-traits" ];
|
||||||
|
"generate-parsing-rs" = [ "peg" ];
|
||||||
|
"into" = [ "syn/extra-traits" ];
|
||||||
|
"is_variant" = [ "convert_case" ];
|
||||||
|
"mul" = [ "syn/extra-traits" ];
|
||||||
|
"mul_assign" = [ "syn/extra-traits" ];
|
||||||
|
"not" = [ "syn/extra-traits" ];
|
||||||
|
"peg" = [ "dep:peg" ];
|
||||||
|
"rustc_version" = [ "dep:rustc_version" ];
|
||||||
|
"testing-helpers" = [ "rustc_version" ];
|
||||||
|
"try_into" = [ "syn/extra-traits" ];
|
||||||
|
"unwrap" = [ "convert_case" "rustc_version" ];
|
||||||
|
};
|
||||||
|
resolvedDefaultFeatures = [ "add" "add_assign" ];
|
||||||
|
};
|
||||||
"digest" = rec {
|
"digest" = rec {
|
||||||
crateName = "digest";
|
crateName = "digest";
|
||||||
version = "0.10.7";
|
version = "0.10.7";
|
||||||
|
@ -1810,6 +1925,49 @@ rec {
|
||||||
features = {
|
features = {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
"dtoa" = rec {
|
||||||
|
crateName = "dtoa";
|
||||||
|
version = "1.0.10";
|
||||||
|
edition = "2018";
|
||||||
|
sha256 = "016gid01rarcdv57h049d7nr9daxc2hc2gqzx0mji57krywd7bfn";
|
||||||
|
authors = [
|
||||||
|
"David Tolnay <dtolnay@gmail.com>"
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"no-panic" = [ "dep:no-panic" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
"dtoa-short" = rec {
|
||||||
|
crateName = "dtoa-short";
|
||||||
|
version = "0.3.5";
|
||||||
|
edition = "2015";
|
||||||
|
sha256 = "11rwnkgql5jilsmwxpx6hjzkgyrbdmx1d71s0jyrjqm5nski25fd";
|
||||||
|
libName = "dtoa_short";
|
||||||
|
authors = [
|
||||||
|
"Xidorn Quan <me@upsuper.org>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "dtoa";
|
||||||
|
packageId = "dtoa";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
|
"ego-tree" = rec {
|
||||||
|
crateName = "ego-tree";
|
||||||
|
version = "0.10.0";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "1n2csy99chk5v5vzjl0ff79vxpxhl76xmcb3aj6brrzzipmjz5xj";
|
||||||
|
libName = "ego_tree";
|
||||||
|
authors = [
|
||||||
|
"June McEnroe <june@causal.agency>"
|
||||||
|
"Carlo Federico Vescovo <vescovocarlofederico@gmail.com>"
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"serde" = [ "dep:serde" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
"either" = rec {
|
"either" = rec {
|
||||||
crateName = "either";
|
crateName = "either";
|
||||||
version = "1.15.0";
|
version = "1.15.0";
|
||||||
|
@ -2155,6 +2313,26 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "alloc" "default" "std" ];
|
resolvedDefaultFeatures = [ "alloc" "default" "std" ];
|
||||||
};
|
};
|
||||||
|
"futf" = rec {
|
||||||
|
crateName = "futf";
|
||||||
|
version = "0.1.5";
|
||||||
|
edition = "2015";
|
||||||
|
sha256 = "0hvqk2r7v4fnc34hvc3vkri89gn52d5m9ihygmwn75l1hhp0whnz";
|
||||||
|
authors = [
|
||||||
|
"Keegan McAllister <kmcallister@mozilla.com>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "mac";
|
||||||
|
packageId = "mac";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "new_debug_unreachable";
|
||||||
|
packageId = "new_debug_unreachable";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
"futures" = rec {
|
"futures" = rec {
|
||||||
crateName = "futures";
|
crateName = "futures";
|
||||||
version = "0.3.31";
|
version = "0.3.31";
|
||||||
|
@ -2433,6 +2611,23 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "alloc" "async-await" "async-await-macro" "channel" "default" "futures-channel" "futures-io" "futures-macro" "futures-sink" "io" "memchr" "sink" "slab" "std" ];
|
resolvedDefaultFeatures = [ "alloc" "async-await" "async-await-macro" "channel" "default" "futures-channel" "futures-io" "futures-macro" "futures-sink" "io" "memchr" "sink" "slab" "std" ];
|
||||||
};
|
};
|
||||||
|
"fxhash" = rec {
|
||||||
|
crateName = "fxhash";
|
||||||
|
version = "0.2.1";
|
||||||
|
edition = "2015";
|
||||||
|
sha256 = "037mb9ichariqi45xm6mz0b11pa92gj38ba0409z3iz239sns6y3";
|
||||||
|
libPath = "lib.rs";
|
||||||
|
authors = [
|
||||||
|
"cbreeden <github@u.breeden.cc>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "byteorder";
|
||||||
|
packageId = "byteorder";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
"gcp_auth" = rec {
|
"gcp_auth" = rec {
|
||||||
crateName = "gcp_auth";
|
crateName = "gcp_auth";
|
||||||
version = "0.12.3";
|
version = "0.12.3";
|
||||||
|
@ -2565,6 +2760,26 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "more_lengths" ];
|
resolvedDefaultFeatures = [ "more_lengths" ];
|
||||||
};
|
};
|
||||||
|
"getopts" = rec {
|
||||||
|
crateName = "getopts";
|
||||||
|
version = "0.2.21";
|
||||||
|
edition = "2015";
|
||||||
|
sha256 = "1mgb3qvivi26gs6ihqqhh8iyhp3vgxri6vwyrwg28w0xqzavznql";
|
||||||
|
authors = [
|
||||||
|
"The Rust Project Developers"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "unicode-width";
|
||||||
|
packageId = "unicode-width 0.1.14";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"core" = [ "dep:core" ];
|
||||||
|
"rustc-dep-of-std" = [ "unicode-width/rustc-dep-of-std" "std" "core" ];
|
||||||
|
"std" = [ "dep:std" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
"getrandom 0.2.15" = rec {
|
"getrandom 0.2.15" = rec {
|
||||||
crateName = "getrandom";
|
crateName = "getrandom";
|
||||||
version = "0.2.15";
|
version = "0.2.15";
|
||||||
|
@ -2938,6 +3153,35 @@ rec {
|
||||||
];
|
];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
"html5ever" = rec {
|
||||||
|
crateName = "html5ever";
|
||||||
|
version = "0.29.1";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "07518h5gbw0c6x7w5br76bgxvgphs6zlrb4q7ii7bg1ww7510x1v";
|
||||||
|
authors = [
|
||||||
|
"The html5ever Project Developers"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "log";
|
||||||
|
packageId = "log";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "mac";
|
||||||
|
packageId = "mac";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "markup5ever";
|
||||||
|
packageId = "markup5ever";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "match_token";
|
||||||
|
packageId = "match_token";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
};
|
||||||
|
};
|
||||||
"http" = rec {
|
"http" = rec {
|
||||||
crateName = "http";
|
crateName = "http";
|
||||||
version = "1.3.1";
|
version = "1.3.1";
|
||||||
|
@ -4471,6 +4715,78 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "std" ];
|
resolvedDefaultFeatures = [ "std" ];
|
||||||
};
|
};
|
||||||
|
"mac" = rec {
|
||||||
|
crateName = "mac";
|
||||||
|
version = "0.1.1";
|
||||||
|
edition = "2015";
|
||||||
|
sha256 = "194vc7vrshqff72rl56f9xgb0cazyl4jda7qsv31m5l6xx7hq7n4";
|
||||||
|
authors = [
|
||||||
|
"Jonathan Reem <jonathan.reem@gmail.com>"
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
|
"markup5ever" = rec {
|
||||||
|
crateName = "markup5ever";
|
||||||
|
version = "0.14.1";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "063sdq7hwxn2al9ygify8dd96mj57n9c4lig007lr1p128yj39y7";
|
||||||
|
libPath = "lib.rs";
|
||||||
|
authors = [
|
||||||
|
"The html5ever Project Developers"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "log";
|
||||||
|
packageId = "log";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "phf";
|
||||||
|
packageId = "phf";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "string_cache";
|
||||||
|
packageId = "string_cache";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "tendril";
|
||||||
|
packageId = "tendril";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
buildDependencies = [
|
||||||
|
{
|
||||||
|
name = "phf_codegen";
|
||||||
|
packageId = "phf_codegen";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "string_cache_codegen";
|
||||||
|
packageId = "string_cache_codegen";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
|
"match_token" = rec {
|
||||||
|
crateName = "match_token";
|
||||||
|
version = "0.1.0";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "0sx3212vkjqfblfhr556ayabbjflbigjf5j591j9kgs4infniac8";
|
||||||
|
procMacro = true;
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "proc-macro2";
|
||||||
|
packageId = "proc-macro2";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "quote";
|
||||||
|
packageId = "quote";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "syn";
|
||||||
|
packageId = "syn 2.0.100";
|
||||||
|
features = [ "full" ];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
"matchers" = rec {
|
"matchers" = rec {
|
||||||
crateName = "matchers";
|
crateName = "matchers";
|
||||||
version = "0.1.0";
|
version = "0.1.0";
|
||||||
|
@ -4717,6 +5033,18 @@ rec {
|
||||||
"vendored" = [ "openssl/vendored" ];
|
"vendored" = [ "openssl/vendored" ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
"new_debug_unreachable" = rec {
|
||||||
|
crateName = "new_debug_unreachable";
|
||||||
|
version = "1.0.6";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "11phpf1mjxq6khk91yzcbd3ympm78m3ivl7xg6lg2c0lf66fy3k5";
|
||||||
|
libName = "debug_unreachable";
|
||||||
|
authors = [
|
||||||
|
"Matt Brubeck <mbrubeck@limpet.net>"
|
||||||
|
"Jonathan Reem <jonathan.reem@gmail.com>"
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
"nu-ansi-term" = rec {
|
"nu-ansi-term" = rec {
|
||||||
crateName = "nu-ansi-term";
|
crateName = "nu-ansi-term";
|
||||||
version = "0.46.0";
|
version = "0.46.0";
|
||||||
|
@ -5280,6 +5608,142 @@ rec {
|
||||||
"unstable" = [ "generate" ];
|
"unstable" = [ "generate" ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
"phf" = rec {
|
||||||
|
crateName = "phf";
|
||||||
|
version = "0.11.3";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "0y6hxp1d48rx2434wgi5g8j1pr8s5jja29ha2b65435fh057imhz";
|
||||||
|
authors = [
|
||||||
|
"Steven Fackler <sfackler@gmail.com>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "phf_macros";
|
||||||
|
packageId = "phf_macros";
|
||||||
|
optional = true;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "phf_shared";
|
||||||
|
packageId = "phf_shared";
|
||||||
|
usesDefaultFeatures = false;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"default" = [ "std" ];
|
||||||
|
"macros" = [ "phf_macros" ];
|
||||||
|
"phf_macros" = [ "dep:phf_macros" ];
|
||||||
|
"serde" = [ "dep:serde" ];
|
||||||
|
"std" = [ "phf_shared/std" ];
|
||||||
|
"uncased" = [ "phf_shared/uncased" ];
|
||||||
|
"unicase" = [ "phf_macros?/unicase" "phf_shared/unicase" ];
|
||||||
|
};
|
||||||
|
resolvedDefaultFeatures = [ "default" "macros" "phf_macros" "std" ];
|
||||||
|
};
|
||||||
|
"phf_codegen" = rec {
|
||||||
|
crateName = "phf_codegen";
|
||||||
|
version = "0.11.3";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "0si1n6zr93kzjs3wah04ikw8z6npsr39jw4dam8yi9czg2609y5f";
|
||||||
|
authors = [
|
||||||
|
"Steven Fackler <sfackler@gmail.com>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "phf_generator";
|
||||||
|
packageId = "phf_generator";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "phf_shared";
|
||||||
|
packageId = "phf_shared";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
|
"phf_generator" = rec {
|
||||||
|
crateName = "phf_generator";
|
||||||
|
version = "0.11.3";
|
||||||
|
edition = "2021";
|
||||||
|
crateBin = [];
|
||||||
|
sha256 = "0gc4np7s91ynrgw73s2i7iakhb4lzdv1gcyx7yhlc0n214a2701w";
|
||||||
|
authors = [
|
||||||
|
"Steven Fackler <sfackler@gmail.com>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "phf_shared";
|
||||||
|
packageId = "phf_shared";
|
||||||
|
usesDefaultFeatures = false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "rand";
|
||||||
|
packageId = "rand 0.8.5";
|
||||||
|
usesDefaultFeatures = false;
|
||||||
|
features = [ "small_rng" ];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"criterion" = [ "dep:criterion" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
"phf_macros" = rec {
|
||||||
|
crateName = "phf_macros";
|
||||||
|
version = "0.11.3";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "05kjfbyb439344rhmlzzw0f9bwk9fp95mmw56zs7yfn1552c0jpq";
|
||||||
|
procMacro = true;
|
||||||
|
authors = [
|
||||||
|
"Steven Fackler <sfackler@gmail.com>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "phf_generator";
|
||||||
|
packageId = "phf_generator";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "phf_shared";
|
||||||
|
packageId = "phf_shared";
|
||||||
|
usesDefaultFeatures = false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "proc-macro2";
|
||||||
|
packageId = "proc-macro2";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "quote";
|
||||||
|
packageId = "quote";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "syn";
|
||||||
|
packageId = "syn 2.0.100";
|
||||||
|
features = [ "full" ];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"unicase" = [ "unicase_" "phf_shared/unicase" ];
|
||||||
|
"unicase_" = [ "dep:unicase_" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
"phf_shared" = rec {
|
||||||
|
crateName = "phf_shared";
|
||||||
|
version = "0.11.3";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "1rallyvh28jqd9i916gk5gk2igdmzlgvv5q0l3xbf3m6y8pbrsk7";
|
||||||
|
authors = [
|
||||||
|
"Steven Fackler <sfackler@gmail.com>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "siphasher";
|
||||||
|
packageId = "siphasher";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"default" = [ "std" ];
|
||||||
|
"uncased" = [ "dep:uncased" ];
|
||||||
|
"unicase" = [ "dep:unicase" ];
|
||||||
|
};
|
||||||
|
resolvedDefaultFeatures = [ "default" "std" ];
|
||||||
|
};
|
||||||
"pin-project" = rec {
|
"pin-project" = rec {
|
||||||
crateName = "pin-project";
|
crateName = "pin-project";
|
||||||
version = "1.1.10";
|
version = "1.1.10";
|
||||||
|
@ -5397,6 +5861,17 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "simd" "std" ];
|
resolvedDefaultFeatures = [ "simd" "std" ];
|
||||||
};
|
};
|
||||||
|
"precomputed-hash" = rec {
|
||||||
|
crateName = "precomputed-hash";
|
||||||
|
version = "0.1.1";
|
||||||
|
edition = "2015";
|
||||||
|
sha256 = "075k9bfy39jhs53cb2fpb9klfakx2glxnf28zdw08ws6lgpq6lwj";
|
||||||
|
libName = "precomputed_hash";
|
||||||
|
authors = [
|
||||||
|
"Emilio Cobos Álvarez <emilio@crisal.io>"
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
"prettyplease" = rec {
|
"prettyplease" = rec {
|
||||||
crateName = "prettyplease";
|
crateName = "prettyplease";
|
||||||
version = "0.2.32";
|
version = "0.2.32";
|
||||||
|
@ -7485,6 +7960,56 @@ rec {
|
||||||
"default" = [ "use_std" ];
|
"default" = [ "use_std" ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
"scraper" = rec {
|
||||||
|
crateName = "scraper";
|
||||||
|
version = "0.23.1";
|
||||||
|
edition = "2021";
|
||||||
|
crateBin = [];
|
||||||
|
sha256 = "1qpz5py0a7y9mg2w4v1lidphz3arhw8dl4jcvf47aml8v3cnazjj";
|
||||||
|
authors = [
|
||||||
|
"June McEnroe <june@causal.agency>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "cssparser";
|
||||||
|
packageId = "cssparser";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "ego-tree";
|
||||||
|
packageId = "ego-tree";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "getopts";
|
||||||
|
packageId = "getopts";
|
||||||
|
optional = true;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "html5ever";
|
||||||
|
packageId = "html5ever";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "precomputed-hash";
|
||||||
|
packageId = "precomputed-hash";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "selectors";
|
||||||
|
packageId = "selectors";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "tendril";
|
||||||
|
packageId = "tendril";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"default" = [ "main" "errors" ];
|
||||||
|
"deterministic" = [ "indexmap" ];
|
||||||
|
"getopts" = [ "dep:getopts" ];
|
||||||
|
"indexmap" = [ "dep:indexmap" ];
|
||||||
|
"main" = [ "getopts" ];
|
||||||
|
"serde" = [ "dep:serde" ];
|
||||||
|
};
|
||||||
|
resolvedDefaultFeatures = [ "default" "errors" "getopts" "main" ];
|
||||||
|
};
|
||||||
"security-framework 2.11.1" = rec {
|
"security-framework 2.11.1" = rec {
|
||||||
crateName = "security-framework";
|
crateName = "security-framework";
|
||||||
version = "2.11.1";
|
version = "2.11.1";
|
||||||
|
@ -7607,6 +8132,69 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "OSX_10_10" "OSX_10_11" "OSX_10_12" "OSX_10_9" "default" ];
|
resolvedDefaultFeatures = [ "OSX_10_10" "OSX_10_11" "OSX_10_12" "OSX_10_9" "default" ];
|
||||||
};
|
};
|
||||||
|
"selectors" = rec {
|
||||||
|
crateName = "selectors";
|
||||||
|
version = "0.26.0";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "1s3zv30rqgdvil7mnfr4xq5nb9m8yp0sai42l28y565mkd68lmpx";
|
||||||
|
libPath = "lib.rs";
|
||||||
|
authors = [
|
||||||
|
"The Servo Project Developers"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "bitflags";
|
||||||
|
packageId = "bitflags 2.9.0";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "cssparser";
|
||||||
|
packageId = "cssparser";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "derive_more";
|
||||||
|
packageId = "derive_more";
|
||||||
|
usesDefaultFeatures = false;
|
||||||
|
features = [ "add" "add_assign" ];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "fxhash";
|
||||||
|
packageId = "fxhash";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "log";
|
||||||
|
packageId = "log";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "new_debug_unreachable";
|
||||||
|
packageId = "new_debug_unreachable";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "phf";
|
||||||
|
packageId = "phf";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "precomputed-hash";
|
||||||
|
packageId = "precomputed-hash";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "servo_arc";
|
||||||
|
packageId = "servo_arc";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "smallvec";
|
||||||
|
packageId = "smallvec";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
buildDependencies = [
|
||||||
|
{
|
||||||
|
name = "phf_codegen";
|
||||||
|
packageId = "phf_codegen";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"to_shmem" = [ "dep:to_shmem" "dep:to_shmem_derive" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
"serde" = rec {
|
"serde" = rec {
|
||||||
crateName = "serde";
|
crateName = "serde";
|
||||||
version = "1.0.219";
|
version = "1.0.219";
|
||||||
|
@ -7951,6 +8539,26 @@ rec {
|
||||||
features = {
|
features = {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
"servo_arc" = rec {
|
||||||
|
crateName = "servo_arc";
|
||||||
|
version = "0.4.0";
|
||||||
|
edition = "2021";
|
||||||
|
sha256 = "06ljch4isnnbv1xpwhjajz4a4mpc7ki47ys9n9yn98kqjhjc8rdf";
|
||||||
|
libPath = "lib.rs";
|
||||||
|
authors = [
|
||||||
|
"The Servo Project Developers"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "stable_deref_trait";
|
||||||
|
packageId = "stable_deref_trait";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"serde" = [ "dep:serde" ];
|
||||||
|
"servo" = [ "serde" "track_alloc_size" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
"sharded-slab" = rec {
|
"sharded-slab" = rec {
|
||||||
crateName = "sharded-slab";
|
crateName = "sharded-slab";
|
||||||
version = "0.1.7";
|
version = "0.1.7";
|
||||||
|
@ -8021,6 +8629,14 @@ rec {
|
||||||
name = "rusqlite";
|
name = "rusqlite";
|
||||||
packageId = "rusqlite";
|
packageId = "rusqlite";
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
name = "scraper";
|
||||||
|
packageId = "scraper";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "serde_json";
|
||||||
|
packageId = "serde_json";
|
||||||
|
}
|
||||||
{
|
{
|
||||||
name = "snix-castore";
|
name = "snix-castore";
|
||||||
packageId = "snix-castore";
|
packageId = "snix-castore";
|
||||||
|
@ -8058,6 +8674,23 @@ rec {
|
||||||
];
|
];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
"siphasher" = rec {
|
||||||
|
crateName = "siphasher";
|
||||||
|
version = "1.0.1";
|
||||||
|
edition = "2018";
|
||||||
|
sha256 = "17f35782ma3fn6sh21c027kjmd227xyrx06ffi8gw4xzv9yry6an";
|
||||||
|
authors = [
|
||||||
|
"Frank Denis <github@pureftpd.org>"
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"default" = [ "std" ];
|
||||||
|
"serde" = [ "dep:serde" ];
|
||||||
|
"serde_json" = [ "dep:serde_json" ];
|
||||||
|
"serde_no_std" = [ "serde/alloc" ];
|
||||||
|
"serde_std" = [ "std" "serde/std" ];
|
||||||
|
};
|
||||||
|
resolvedDefaultFeatures = [ "default" "std" ];
|
||||||
|
};
|
||||||
"slab" = rec {
|
"slab" = rec {
|
||||||
crateName = "slab";
|
crateName = "slab";
|
||||||
version = "0.4.9";
|
version = "0.4.9";
|
||||||
|
@ -8477,7 +9110,75 @@ rec {
|
||||||
"default" = [ "std" ];
|
"default" = [ "std" ];
|
||||||
"std" = [ "alloc" ];
|
"std" = [ "alloc" ];
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "alloc" ];
|
resolvedDefaultFeatures = [ "alloc" "default" "std" ];
|
||||||
|
};
|
||||||
|
"string_cache" = rec {
|
||||||
|
crateName = "string_cache";
|
||||||
|
version = "0.8.9";
|
||||||
|
edition = "2018";
|
||||||
|
sha256 = "03z7km2kzlwiv2r2qifq5riv4g8phazwng9wnvs3py3lzainnxxz";
|
||||||
|
authors = [
|
||||||
|
"The Servo Project Developers"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "new_debug_unreachable";
|
||||||
|
packageId = "new_debug_unreachable";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "parking_lot";
|
||||||
|
packageId = "parking_lot";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "phf_shared";
|
||||||
|
packageId = "phf_shared";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "precomputed-hash";
|
||||||
|
packageId = "precomputed-hash";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "serde";
|
||||||
|
packageId = "serde";
|
||||||
|
optional = true;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"default" = [ "serde_support" ];
|
||||||
|
"malloc_size_of" = [ "dep:malloc_size_of" ];
|
||||||
|
"serde" = [ "dep:serde" ];
|
||||||
|
"serde_support" = [ "serde" ];
|
||||||
|
};
|
||||||
|
resolvedDefaultFeatures = [ "default" "serde" "serde_support" ];
|
||||||
|
};
|
||||||
|
"string_cache_codegen" = rec {
|
||||||
|
crateName = "string_cache_codegen";
|
||||||
|
version = "0.5.4";
|
||||||
|
edition = "2018";
|
||||||
|
sha256 = "181ir4d6y053s1kka2idpjx5g9d9jgll6fy517jhzzpi2n3r44f7";
|
||||||
|
libPath = "lib.rs";
|
||||||
|
authors = [
|
||||||
|
"The Servo Project Developers"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "phf_generator";
|
||||||
|
packageId = "phf_generator";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "phf_shared";
|
||||||
|
packageId = "phf_shared";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "proc-macro2";
|
||||||
|
packageId = "proc-macro2";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "quote";
|
||||||
|
packageId = "quote";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
};
|
};
|
||||||
"strsim" = rec {
|
"strsim" = rec {
|
||||||
crateName = "strsim";
|
crateName = "strsim";
|
||||||
|
@ -8718,6 +9419,35 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "default" "getrandom" ];
|
resolvedDefaultFeatures = [ "default" "getrandom" ];
|
||||||
};
|
};
|
||||||
|
"tendril" = rec {
|
||||||
|
crateName = "tendril";
|
||||||
|
version = "0.4.3";
|
||||||
|
edition = "2015";
|
||||||
|
sha256 = "1c3vip59sqwxn148i714nmkrvjzbk7105vj0h92s6r64bw614jnj";
|
||||||
|
authors = [
|
||||||
|
"Keegan McAllister <mcallister.keegan@gmail.com>"
|
||||||
|
"Simon Sapin <simon.sapin@exyr.org>"
|
||||||
|
"Chris Morgan <me@chrismorgan.info>"
|
||||||
|
];
|
||||||
|
dependencies = [
|
||||||
|
{
|
||||||
|
name = "futf";
|
||||||
|
packageId = "futf";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "mac";
|
||||||
|
packageId = "mac";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "utf-8";
|
||||||
|
packageId = "utf-8";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
features = {
|
||||||
|
"encoding" = [ "dep:encoding" ];
|
||||||
|
"encoding_rs" = [ "dep:encoding_rs" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
"thiserror 1.0.69" = rec {
|
"thiserror 1.0.69" = rec {
|
||||||
crateName = "thiserror";
|
crateName = "thiserror";
|
||||||
version = "1.0.69";
|
version = "1.0.69";
|
||||||
|
@ -10325,6 +11055,17 @@ rec {
|
||||||
};
|
};
|
||||||
resolvedDefaultFeatures = [ "default" "std" ];
|
resolvedDefaultFeatures = [ "default" "std" ];
|
||||||
};
|
};
|
||||||
|
"utf-8" = rec {
|
||||||
|
crateName = "utf-8";
|
||||||
|
version = "0.7.6";
|
||||||
|
edition = "2015";
|
||||||
|
sha256 = "1a9ns3fvgird0snjkd3wbdhwd3zdpc2h5gpyybrfr6ra5pkqxk09";
|
||||||
|
libName = "utf8";
|
||||||
|
authors = [
|
||||||
|
"Simon Sapin <simon.sapin@exyr.org>"
|
||||||
|
];
|
||||||
|
|
||||||
|
};
|
||||||
"utf16_iter" = rec {
|
"utf16_iter" = rec {
|
||||||
crateName = "utf16_iter";
|
crateName = "utf16_iter";
|
||||||
version = "1.0.5";
|
version = "1.0.5";
|
||||||
|
|
|
@ -9,7 +9,10 @@ clap = "4.5.35"
|
||||||
futures = "0.3.31"
|
futures = "0.3.31"
|
||||||
reqwest = "0.12.15"
|
reqwest = "0.12.15"
|
||||||
rusqlite = "0.34.0"
|
rusqlite = "0.34.0"
|
||||||
|
scraper = "0.23.1"
|
||||||
|
serde_json = "1.0.140"
|
||||||
snix-castore = { version = "0.1.0", git = "https://git.snix.dev/snix/snix.git" }
|
snix-castore = { version = "0.1.0", git = "https://git.snix.dev/snix/snix.git" }
|
||||||
tokio = "1.44.2"
|
tokio = "1.44.2"
|
||||||
|
tokio-stream = "0.1.17"
|
||||||
tokio-util = "0.7.14"
|
tokio-util = "0.7.14"
|
||||||
url = "2.5.4"
|
url = "2.5.4"
|
||||||
|
|
|
@ -31,4 +31,8 @@ mkShell {
|
||||||
openssl
|
openssl
|
||||||
sqlite
|
sqlite
|
||||||
];
|
];
|
||||||
|
shellHook = ''
|
||||||
|
export DATABASE_PATH="$HOME/.local/share/sidx/sidx.db"
|
||||||
|
unset out outputs phases
|
||||||
|
'';
|
||||||
}
|
}
|
||||||
|
|
463
src/main.rs
463
src/main.rs
|
@ -1,12 +1,24 @@
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::path::{absolute, PathBuf};
|
use std::path::{absolute, PathBuf};
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::anyhow;
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
|
use anyhow::{anyhow, Error};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use clap::Subcommand;
|
use clap::Subcommand;
|
||||||
use futures::{stream, StreamExt, TryStreamExt};
|
use futures::{stream, StreamExt, TryStreamExt};
|
||||||
use rusqlite::{params, OptionalExtension};
|
use rusqlite::{params, OptionalExtension};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
use snix_castore::blobservice::BlobService;
|
||||||
|
use snix_castore::directoryservice::DirectoryService;
|
||||||
|
use snix_castore::B3Digest;
|
||||||
use snix_castore::{blobservice, directoryservice, import::fs::ingest_path};
|
use snix_castore::{blobservice, directoryservice, import::fs::ingest_path};
|
||||||
|
use std::sync::Mutex;
|
||||||
|
use tokio::io::{AsyncReadExt, BufReader};
|
||||||
|
use tokio::sync::mpsc::{channel, Sender};
|
||||||
|
use tokio::sync::Semaphore;
|
||||||
|
use tokio_stream::wrappers::ReceiverStream;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
|
@ -15,22 +27,28 @@ enum Ingestable {
|
||||||
Path(PathBuf),
|
Path(PathBuf),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
enum IngestedWhen {
|
enum IngestedWhen {
|
||||||
Now,
|
Now,
|
||||||
Before,
|
Before,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
struct Ingested {
|
struct Ingested {
|
||||||
sample_id: u32,
|
sample_id: u32,
|
||||||
uri: String,
|
uri: String,
|
||||||
blake3: String,
|
blake3: B3Digest,
|
||||||
epoch: u32,
|
epoch: u32,
|
||||||
when: IngestedWhen,
|
when: IngestedWhen,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
enum FetchListingMessage {
|
||||||
|
Ingested(Url, Ingested),
|
||||||
|
Recurse(Url, usize),
|
||||||
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Ingestable {
|
impl std::fmt::Display for Ingestable {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
|
@ -45,7 +63,7 @@ impl std::fmt::Display for Ingestable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_url_or_path(s: &str) -> Result<Ingestable, anyhow::Error> {
|
fn parse_url_or_path(s: &str) -> Result<Ingestable, Error> {
|
||||||
if s.is_empty() {
|
if s.is_empty() {
|
||||||
Err(anyhow!("Empty path (url)"))
|
Err(anyhow!("Empty path (url)"))
|
||||||
} else if s.starts_with("./") || s.starts_with("/") {
|
} else if s.starts_with("./") || s.starts_with("/") {
|
||||||
|
@ -69,7 +87,7 @@ fn parse_url_or_path(s: &str) -> Result<Ingestable, anyhow::Error> {
|
||||||
fn data_path() -> PathBuf {
|
fn data_path() -> PathBuf {
|
||||||
let xdg_data_dir = std::env::var("XDG_DATA_DIR")
|
let xdg_data_dir = std::env::var("XDG_DATA_DIR")
|
||||||
.and_then(|s| Ok(PathBuf::from(s)))
|
.and_then(|s| Ok(PathBuf::from(s)))
|
||||||
.or_else(|_| -> Result<PathBuf, anyhow::Error> {
|
.or_else(|_| -> Result<PathBuf, Error> {
|
||||||
match std::env::home_dir() {
|
match std::env::home_dir() {
|
||||||
Some(p) => Ok(p.join(".local/share")),
|
Some(p) => Ok(p.join(".local/share")),
|
||||||
None => Err(anyhow!("...")), // FIXME
|
None => Err(anyhow!("...")), // FIXME
|
||||||
|
@ -93,6 +111,12 @@ enum Command {
|
||||||
#[clap(value_parser = parse_url_or_path, num_args = 1)]
|
#[clap(value_parser = parse_url_or_path, num_args = 1)]
|
||||||
inputs: Vec<Ingestable>,
|
inputs: Vec<Ingestable>,
|
||||||
},
|
},
|
||||||
|
FetchListing {
|
||||||
|
#[clap(value_parser, long, default_value_t = 5)]
|
||||||
|
max_depth: usize,
|
||||||
|
#[clap(value_parser, num_args = 1)]
|
||||||
|
inputs: Vec<Url>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
|
@ -100,7 +124,7 @@ struct Cli {
|
||||||
#[clap(short, long, action)]
|
#[clap(short, long, action)]
|
||||||
refetch: bool,
|
refetch: bool,
|
||||||
|
|
||||||
#[clap(short, long, value_parser, default_value_t = 5)]
|
#[clap(short, long, value_parser, default_value_t = 4)]
|
||||||
max_parallel: usize,
|
max_parallel: usize,
|
||||||
|
|
||||||
#[clap(short, long, value_parser, default_value_os_t = default_db_path())]
|
#[clap(short, long, value_parser, default_value_os_t = default_db_path())]
|
||||||
|
@ -113,130 +137,33 @@ struct Cli {
|
||||||
command: Option<Command>,
|
command: Option<Command>,
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn ingest<BS, DS>(
|
struct SidxContext<BS, DS>
|
||||||
inputs: &Vec<Ingestable>,
|
where
|
||||||
|
BS: blobservice::BlobService + Clone + Send + 'static,
|
||||||
|
DS: directoryservice::DirectoryService + Clone + Send + 'static,
|
||||||
|
{
|
||||||
refetch: bool,
|
refetch: bool,
|
||||||
max_parallel: usize,
|
max_parallel: usize,
|
||||||
http_client: reqwest::Client,
|
http: reqwest::Client,
|
||||||
|
con: Arc<Mutex<rusqlite::Connection>>,
|
||||||
blob_service: BS,
|
blob_service: BS,
|
||||||
dir_service: DS,
|
dir_service: DS,
|
||||||
con: rusqlite::Connection,
|
|
||||||
) -> Vec<Result<Option<Ingested>, anyhow::Error>>
|
|
||||||
where
|
|
||||||
BS: blobservice::BlobService,
|
|
||||||
DS: directoryservice::DirectoryService,
|
|
||||||
{
|
|
||||||
let samples = stream::iter(inputs.iter().map(|uri| {
|
|
||||||
let client = &http_client;
|
|
||||||
let blob_service = &blob_service;
|
|
||||||
let dir_service = &dir_service;
|
|
||||||
let con = &con;
|
|
||||||
|
|
||||||
let mut find_sample = con
|
|
||||||
.prepare(include_str!("q/latest-download.sql"))
|
|
||||||
.expect("Failed to prepare latest-download.sql");
|
|
||||||
let mut add_sample = con
|
|
||||||
.prepare(include_str!("q/add-sample.sql"))
|
|
||||||
.expect("Failed to prepare add-sample.sql");
|
|
||||||
let mut add_blob = con
|
|
||||||
.prepare(include_str!("q/upsert-blob.sql"))
|
|
||||||
.expect("Failed to prepare upsert-blob.sql");
|
|
||||||
let mut add_uri = con
|
|
||||||
.prepare(include_str!("q/upsert-uri.sql"))
|
|
||||||
.expect("Failed to prepare upsert-uri.sql");
|
|
||||||
|
|
||||||
async move {
|
|
||||||
let uri_s = uri.to_string();
|
|
||||||
let latest_download = find_sample
|
|
||||||
.query_row(params![uri_s], |r| <(u32, String, u32)>::try_from(r))
|
|
||||||
.optional()?;
|
|
||||||
if let Some((sample_id, blake3, epoch)) = latest_download {
|
|
||||||
if !refetch {
|
|
||||||
return Ok::<Option<Ingested>, anyhow::Error>(Some(Ingested {
|
|
||||||
sample_id,
|
|
||||||
uri: uri_s,
|
|
||||||
blake3,
|
|
||||||
epoch,
|
|
||||||
when: IngestedWhen::Before,
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let (digest, n_bytes) = match uri {
|
|
||||||
Ingestable::Path(path) => {
|
|
||||||
match ingest_path::<_, _, _, &[u8]>(&blob_service, &dir_service, path, None)
|
|
||||||
.await?
|
|
||||||
{
|
|
||||||
snix_castore::Node::Directory { digest, size } => (digest, size),
|
|
||||||
snix_castore::Node::File {
|
|
||||||
digest,
|
|
||||||
size,
|
|
||||||
executable: _,
|
|
||||||
} => (digest, size),
|
|
||||||
snix_castore::Node::Symlink { target: _ } => {
|
|
||||||
return Err(anyhow!("TODO: Figure out what to do with symlink roots"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ingestable::Url(url) => {
|
|
||||||
let res = client
|
|
||||||
.get(url.clone())
|
|
||||||
.send()
|
|
||||||
.await
|
|
||||||
.context(format!("Request.send failed early for {:?}", uri))?
|
|
||||||
.error_for_status()?;
|
|
||||||
let mut r = tokio_util::io::StreamReader::new(
|
|
||||||
res.bytes_stream().map_err(std::io::Error::other),
|
|
||||||
);
|
|
||||||
let mut w = blob_service.open_write().await;
|
|
||||||
let n_bytes = match tokio::io::copy(&mut r, &mut w).await {
|
|
||||||
Ok(n) => n,
|
|
||||||
Err(e) => {
|
|
||||||
return Err(anyhow!(
|
|
||||||
"tokio::io::copy failed for uri={} with {}",
|
|
||||||
uri_s,
|
|
||||||
e
|
|
||||||
));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let digest = w.close().await?;
|
|
||||||
(digest, n_bytes)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let digest64 = format!("{}", digest);
|
|
||||||
add_blob.execute(params![digest64, n_bytes,])?;
|
|
||||||
add_uri.execute(params![uri_s])?;
|
|
||||||
let (sample_id, epoch) = add_sample
|
|
||||||
.query_row(params![uri_s, digest64], |row| <(u32, u32)>::try_from(row))?;
|
|
||||||
Ok(Some(Ingested {
|
|
||||||
sample_id,
|
|
||||||
uri: uri_s,
|
|
||||||
blake3: digest64,
|
|
||||||
epoch,
|
|
||||||
when: IngestedWhen::Now,
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
}))
|
|
||||||
.buffer_unordered(max_parallel)
|
|
||||||
.collect::<Vec<Result<Option<Ingested>, _>>>()
|
|
||||||
.await;
|
|
||||||
|
|
||||||
samples
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
async fn open_context(
|
||||||
async fn main() {
|
refetch: bool,
|
||||||
let args = Cli::parse();
|
max_parallel: usize,
|
||||||
|
db_path: PathBuf,
|
||||||
args.db_path.parent().and_then(|p| {
|
castore_path: PathBuf,
|
||||||
|
) -> SidxContext<Arc<dyn BlobService>, Arc<dyn DirectoryService>> {
|
||||||
|
if let Some(p) = db_path.parent() {
|
||||||
let _ = std::fs::create_dir_all(p);
|
let _ = std::fs::create_dir_all(p);
|
||||||
Some(())
|
}
|
||||||
});
|
|
||||||
|
|
||||||
let con =
|
let con = rusqlite::Connection::open(&db_path).expect("Failed to construct Database object");
|
||||||
rusqlite::Connection::open(&args.db_path).expect("Failed to construct Database object");
|
|
||||||
con.execute_batch(include_str!("q/init.sql"))
|
con.execute_batch(include_str!("q/init.sql"))
|
||||||
.expect("Failed to execute init.sql");
|
.expect("Failed to execute init.sql");
|
||||||
let castore_path = absolute(args.castore_path).expect("Failed to canonicalize castore_path");
|
let castore_path = absolute(castore_path).expect("Failed to canonicalize castore_path");
|
||||||
let blob_service = blobservice::from_addr(&std::format!(
|
let blob_service = blobservice::from_addr(&std::format!(
|
||||||
"objectstore+file://{}",
|
"objectstore+file://{}",
|
||||||
castore_path
|
castore_path
|
||||||
|
@ -256,20 +183,279 @@ async fn main() {
|
||||||
.await
|
.await
|
||||||
.expect("Couldn't initialize .castore/directory");
|
.expect("Couldn't initialize .castore/directory");
|
||||||
|
|
||||||
let client = reqwest::Client::new();
|
SidxContext::<Arc<dyn BlobService>, Arc<dyn DirectoryService>> {
|
||||||
|
refetch,
|
||||||
|
max_parallel,
|
||||||
|
http: reqwest::Client::new(),
|
||||||
|
con: Arc::new(Mutex::new(con)),
|
||||||
|
blob_service,
|
||||||
|
dir_service,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<BS: BlobService + Clone, DS: DirectoryService + Clone> SidxContext<BS, DS> {
|
||||||
|
async fn db_latest_download(&self, uri: &str) -> Result<Option<Ingested>, Error> {
|
||||||
|
let lock = self.con.lock().unwrap();
|
||||||
|
let mut find_sample = lock
|
||||||
|
.prepare_cached(include_str!("q/latest-download.sql"))
|
||||||
|
.expect("Failed to prepare latest-download.sql");
|
||||||
|
find_sample
|
||||||
|
.query_row(params![uri], |r| <(u32, String, u32)>::try_from(r))
|
||||||
|
.optional()
|
||||||
|
.context("db_latest_download.sql")
|
||||||
|
.and_then(|maybe_triple| match maybe_triple {
|
||||||
|
Some((sample_id, blake3, epoch)) => Ok(Some(Ingested {
|
||||||
|
sample_id,
|
||||||
|
uri: uri.to_string(),
|
||||||
|
blake3: B3Digest::from_str(&blake3)?,
|
||||||
|
epoch,
|
||||||
|
when: IngestedWhen::Before,
|
||||||
|
})),
|
||||||
|
None => Ok(None),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
async fn db_add_sample(&self, uri: &str, blake3: &str) -> Result<(u32, u32), rusqlite::Error> {
|
||||||
|
let lock = self.con.lock().unwrap();
|
||||||
|
let mut add_sample = lock
|
||||||
|
.prepare_cached(include_str!("q/add-sample.sql"))
|
||||||
|
.expect("Failed to prepare add-sample.sql");
|
||||||
|
add_sample.query_row(params![uri, blake3], |row| <(u32, u32)>::try_from(row))
|
||||||
|
}
|
||||||
|
async fn db_add_blob(&self, blake3: &str, n_bytes: u64) -> Result<usize, rusqlite::Error> {
|
||||||
|
let lock = self.con.lock().unwrap();
|
||||||
|
let mut add_blob = lock
|
||||||
|
.prepare_cached(include_str!("q/upsert-blob.sql"))
|
||||||
|
.expect("Failed to prepare upsert-blob.sql");
|
||||||
|
add_blob.execute(params![blake3, n_bytes,])
|
||||||
|
}
|
||||||
|
async fn db_add_uri(&self, uri: &str) -> Result<usize, rusqlite::Error> {
|
||||||
|
let lock = self.con.lock().unwrap();
|
||||||
|
let mut add_uri = lock
|
||||||
|
.prepare_cached(include_str!("q/upsert-uri.sql"))
|
||||||
|
.expect("Failed to prepare upsert-uri.sql");
|
||||||
|
|
||||||
|
add_uri.execute(params![uri])
|
||||||
|
}
|
||||||
|
async fn record_ingested_node(
|
||||||
|
&self,
|
||||||
|
uri: &str,
|
||||||
|
blake3: &snix_castore::B3Digest,
|
||||||
|
n_bytes: u64,
|
||||||
|
) -> Result<Ingested, Error> {
|
||||||
|
let digest64 = format!("{}", blake3);
|
||||||
|
self.db_add_blob(&digest64, n_bytes).await?;
|
||||||
|
self.db_add_uri(&uri).await?;
|
||||||
|
let (sample_id, epoch) = self.db_add_sample(&uri, &digest64).await?;
|
||||||
|
Ok(Ingested {
|
||||||
|
sample_id,
|
||||||
|
uri: uri.to_string(),
|
||||||
|
blake3: blake3.clone(),
|
||||||
|
epoch,
|
||||||
|
when: IngestedWhen::Now,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
async fn download_no_cache(&self, uri: &Url) -> Result<Ingested, Error> {
|
||||||
|
let uri_s = uri.to_string();
|
||||||
|
let res = self
|
||||||
|
.http
|
||||||
|
.get(uri.clone())
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.context(format!("Request::send failed early for {:?}", uri))?
|
||||||
|
.error_for_status()?;
|
||||||
|
let mut r =
|
||||||
|
tokio_util::io::StreamReader::new(res.bytes_stream().map_err(std::io::Error::other));
|
||||||
|
let mut w = self.blob_service.open_write().await;
|
||||||
|
let n_bytes = match tokio::io::copy(&mut r, &mut w).await {
|
||||||
|
Ok(n) => n,
|
||||||
|
Err(e) => {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"tokio::io::copy failed for uri={} with {}",
|
||||||
|
uri_s,
|
||||||
|
e
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let digest = w.close().await?;
|
||||||
|
self.record_ingested_node(&uri_s, &digest, n_bytes).await
|
||||||
|
}
|
||||||
|
async fn download(&self, uri: &Url) -> Result<Ingested, Error> {
|
||||||
|
if self.refetch {
|
||||||
|
self.download_no_cache(&uri).await
|
||||||
|
} else {
|
||||||
|
match self.db_latest_download(&uri.to_string()).await? {
|
||||||
|
Some(ingested) => Ok(ingested),
|
||||||
|
None => self.download_no_cache(&uri).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async fn ingest(&self, inputs: &Vec<Ingestable>) -> Vec<Result<Option<Ingested>, Error>> {
|
||||||
|
let samples = stream::iter(inputs.iter().map(|uri| {
|
||||||
|
let blob_service = &self.blob_service;
|
||||||
|
let dir_service = &self.dir_service;
|
||||||
|
|
||||||
|
async move {
|
||||||
|
let uri_s = uri.to_string();
|
||||||
|
let latest_download = self.db_latest_download(&uri_s).await?;
|
||||||
|
if latest_download.is_some() {
|
||||||
|
return Ok(latest_download);
|
||||||
|
}
|
||||||
|
match uri {
|
||||||
|
Ingestable::Path(path) => {
|
||||||
|
match ingest_path::<_, _, _, &[u8]>(&blob_service, &dir_service, path, None)
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
snix_castore::Node::Directory { digest, size } => self
|
||||||
|
.record_ingested_node(&uri_s, &digest, size)
|
||||||
|
.await
|
||||||
|
.map(Some),
|
||||||
|
|
||||||
|
snix_castore::Node::File {
|
||||||
|
digest,
|
||||||
|
size,
|
||||||
|
executable: _,
|
||||||
|
} => self
|
||||||
|
.record_ingested_node(&uri_s, &digest, size)
|
||||||
|
.await
|
||||||
|
.map(Some),
|
||||||
|
snix_castore::Node::Symlink { target: _ } => {
|
||||||
|
Err(anyhow!("TODO: Figure out what to do with symlink roots"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ingestable::Url(url) => self.download(url).await.map(Some),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.buffer_unordered(self.max_parallel)
|
||||||
|
.collect::<Vec<Result<Option<Ingested>, _>>>()
|
||||||
|
.await;
|
||||||
|
|
||||||
|
samples
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_hrefs(content: &str) -> Result<Vec<String>, Error> {
|
||||||
|
let sel = Selector::parse("a").map_err(|e| anyhow!(e.to_string()))?;
|
||||||
|
let html = Html::parse_document(&content);
|
||||||
|
|
||||||
|
Ok(html
|
||||||
|
.select(&sel)
|
||||||
|
.flat_map(|elt| elt.value().attr("href"))
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect::<Vec<_>>())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_from_listing_impl(
|
||||||
|
self: Arc<Self>,
|
||||||
|
url: Url,
|
||||||
|
max_depth: usize,
|
||||||
|
tx: Sender<FetchListingMessage>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
eprintln!("Downloading {:?}", url.to_string());
|
||||||
|
let root = self.download(&url).await?;
|
||||||
|
tx.send(FetchListingMessage::Ingested(url.clone(), root.clone()))
|
||||||
|
.await
|
||||||
|
.context("Stopped accepting tasks before processing an Ingested notification")?;
|
||||||
|
if max_depth <= 0 {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
/* TODO: no need to load blobs to memory unless you know they're text/html */
|
||||||
|
match self.blob_service.open_read(&root.blake3).await? {
|
||||||
|
Some(mut reader) => {
|
||||||
|
let content = {
|
||||||
|
let mut br = BufReader::new(&mut *reader);
|
||||||
|
let mut content = String::new();
|
||||||
|
br.read_to_string(&mut content).await?;
|
||||||
|
content
|
||||||
|
};
|
||||||
|
let hrefs = Self::extract_hrefs(&content).unwrap_or(vec![]);
|
||||||
|
/* max_depth > 0 here */
|
||||||
|
for href in hrefs {
|
||||||
|
let next_url = url.join(&href).context("Constructing next_url")?;
|
||||||
|
tx.send(FetchListingMessage::Recurse(
|
||||||
|
next_url.clone(),
|
||||||
|
max_depth - 1,
|
||||||
|
))
|
||||||
|
.await
|
||||||
|
.context("Stopped accepting tasks before finishing all hrefs")?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
None => Err(anyhow!("Couldn't read the ingested blob")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_from_listing(
|
||||||
|
self: Arc<Self>,
|
||||||
|
url: Url,
|
||||||
|
max_depth: usize,
|
||||||
|
) -> ReceiverStream<Ingested> {
|
||||||
|
let mq_size = 10;
|
||||||
|
|
||||||
|
/* TODO: move task queue to e.g. sqlite */
|
||||||
|
let (tx, mut rx) = channel(mq_size);
|
||||||
|
|
||||||
|
let (out_tx, out_rx) = channel(mq_size);
|
||||||
|
|
||||||
|
let semaphore = Arc::new(Semaphore::new(self.max_parallel));
|
||||||
|
|
||||||
|
tokio::spawn({
|
||||||
|
async move {
|
||||||
|
let mut seen: HashSet<String> = HashSet::new();
|
||||||
|
tx.send(FetchListingMessage::Recurse(url, max_depth))
|
||||||
|
.await
|
||||||
|
.expect("fetch_from_listing failed populating the queue");
|
||||||
|
while let Some(m) = rx.recv().await {
|
||||||
|
match m {
|
||||||
|
FetchListingMessage::Ingested(_url, ingested) => {
|
||||||
|
out_tx
|
||||||
|
.send(ingested)
|
||||||
|
.await
|
||||||
|
.expect("ReceiverStream failed to accept an Ingestable");
|
||||||
|
}
|
||||||
|
FetchListingMessage::Recurse(url, max_depth) => {
|
||||||
|
if max_depth > 0 && !seen.contains(&url.to_string()) {
|
||||||
|
seen.insert(url.to_string());
|
||||||
|
tokio::spawn({
|
||||||
|
let s = self.clone();
|
||||||
|
let url = url.clone();
|
||||||
|
let tx = tx.clone();
|
||||||
|
let semaphore = semaphore.clone();
|
||||||
|
async move {
|
||||||
|
let _permit = semaphore.acquire();
|
||||||
|
s.fetch_from_listing_impl(url, max_depth, tx).await
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ReceiverStream::new(out_rx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
let args = Cli::parse();
|
||||||
|
|
||||||
|
let _cwd = std::env::current_dir().expect("Couldn't get CWD");
|
||||||
|
let _host_name = std::env::var("HOSTNAME").map_or(None, Some);
|
||||||
|
|
||||||
|
let ctx = Arc::new(
|
||||||
|
open_context(
|
||||||
|
args.refetch,
|
||||||
|
args.max_parallel,
|
||||||
|
args.db_path,
|
||||||
|
args.castore_path,
|
||||||
|
)
|
||||||
|
.await,
|
||||||
|
);
|
||||||
|
|
||||||
match args.command {
|
match args.command {
|
||||||
Some(Command::Ingest { inputs }) => {
|
Some(Command::Ingest { inputs }) => {
|
||||||
let samples = ingest(
|
let samples = ctx.ingest(&inputs).await;
|
||||||
&inputs,
|
|
||||||
args.refetch,
|
|
||||||
args.max_parallel,
|
|
||||||
client,
|
|
||||||
blob_service,
|
|
||||||
dir_service,
|
|
||||||
con,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
for s in samples {
|
for s in samples {
|
||||||
match s {
|
match s {
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
@ -282,6 +468,19 @@ async fn main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Some(Command::FetchListing { max_depth, inputs }) => {
|
||||||
|
let ingested: Vec<Ingested> = stream::iter(inputs)
|
||||||
|
.then(async |i| {
|
||||||
|
let i = i.clone();
|
||||||
|
ctx.clone().fetch_from_listing(i, max_depth).await
|
||||||
|
})
|
||||||
|
.flatten_unordered(args.max_parallel)
|
||||||
|
.collect()
|
||||||
|
.await;
|
||||||
|
for i in ingested {
|
||||||
|
eprintln!("{:?}", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
None => {}
|
None => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue