diff --git a/Cargo.lock b/Cargo.lock index 107eccf..ee023c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -342,12 +342,6 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "bytes" version = "1.10.1" @@ -518,29 +512,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "cssparser" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" -dependencies = [ - "cssparser-macros", - "dtoa-short", - "itoa", - "phf", - "smallvec", -] - -[[package]] -name = "cssparser-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" -dependencies = [ - "quote", - "syn 2.0.100", -] - [[package]] name = "darling" version = "0.20.11" @@ -592,17 +563,6 @@ dependencies = [ "serde", ] -[[package]] -name = "derive_more" -version = "0.99.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3da29a38df43d6f156149c9b43ded5e018ddff2a855cf2cfd62e8cd7d079c69f" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.100", -] - [[package]] name = "digest" version = "0.10.7" @@ -631,27 +591,6 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" -[[package]] -name = "dtoa" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" - -[[package]] -name = "dtoa-short" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" -dependencies = [ - "dtoa", -] - -[[package]] -name = "ego-tree" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" - [[package]] name = "either" version = "1.15.0" @@ -788,16 +727,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "futf" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" -dependencies = [ - "mac", - "new_debug_unreachable", -] - [[package]] name = "futures" version = "0.3.31" @@ -887,15 +816,6 @@ dependencies = [ "slab", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "gcp_auth" version = "0.12.3" @@ -933,15 +853,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "getopts" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" -dependencies = [ - "unicode-width 0.1.14", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -1047,18 +958,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "html5ever" -version = "0.29.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" -dependencies = [ - "log", - "mac", - "markup5ever", - "match_token", -] - [[package]] name = "http" version = "1.3.1" @@ -1530,37 +1429,6 @@ version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" -[[package]] -name = "mac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" - -[[package]] -name = "markup5ever" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" -dependencies = [ - "log", - "phf", - "phf_codegen", - "string_cache", - "string_cache_codegen", - "tendril", -] - -[[package]] -name = "match_token" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.100", -] - [[package]] name = "matchers" version = "0.1.0" @@ -1641,12 +1509,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "new_debug_unreachable" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" - [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1823,58 +1685,6 @@ dependencies = [ "indexmap 2.9.0", ] -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_macros", - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" -dependencies = [ - "phf_shared", - "rand 0.8.5", -] - -[[package]] -name = "phf_macros" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" -dependencies = [ - "phf_generator", - "phf_shared", - "proc-macro2", - "quote", - "syn 2.0.100", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", -] - [[package]] name = "pin-project" version = "1.1.10" @@ -1934,12 +1744,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - [[package]] name = "prettyplease" version = "0.2.32" @@ -2496,21 +2300,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "scraper" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2" -dependencies = [ - "cssparser", - "ego-tree", - "getopts", - "html5ever", - "precomputed-hash", - "selectors", - "tendril", -] - [[package]] name = "security-framework" version = "2.11.1" @@ -2547,25 +2336,6 @@ dependencies = [ "libc", ] -[[package]] -name = "selectors" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" -dependencies = [ - "bitflags 2.9.0", - "cssparser", - "derive_more", - "fxhash", - "log", - "new_debug_unreachable", - "phf", - "phf_codegen", - "precomputed-hash", - "servo_arc", - "smallvec", -] - [[package]] name = "serde" version = "1.0.219" @@ -2661,15 +2431,6 @@ dependencies = [ "syn 2.0.100", ] -[[package]] -name = "servo_arc" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a" -dependencies = [ - "stable_deref_trait", -] - [[package]] name = "sharded-slab" version = "0.1.7" @@ -2694,12 +2455,8 @@ dependencies = [ "futures", "reqwest", "rusqlite", - "scraper", - "serde", - "serde_json", "snix-castore", "tokio", - "tokio-stream", "tokio-util", "url", ] @@ -2713,12 +2470,6 @@ dependencies = [ "libc", ] -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - [[package]] name = "slab" version = "0.4.9" @@ -2837,31 +2588,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "string_cache" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" -dependencies = [ - "new_debug_unreachable", - "parking_lot", - "phf_shared", - "precomputed-hash", - "serde", -] - -[[package]] -name = "string_cache_codegen" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" -dependencies = [ - "phf_generator", - "phf_shared", - "proc-macro2", - "quote", -] - [[package]] name = "strsim" version = "0.11.1" @@ -2950,17 +2676,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "tendril" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" -dependencies = [ - "futf", - "mac", - "utf-8", -] - [[package]] name = "thiserror" version = "1.0.69" @@ -3414,12 +3129,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - [[package]] name = "utf16_iter" version = "1.0.5" diff --git a/Cargo.nix b/Cargo.nix index 4cf77bc..22eb2d2 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -1104,19 +1104,6 @@ rec { }; resolvedDefaultFeatures = [ "default" ]; }; - "byteorder" = rec { - crateName = "byteorder"; - version = "1.5.0"; - edition = "2021"; - sha256 = "0jzncxyf404mwqdbspihyzpkndfgda450l0893pz5xj685cg5l0z"; - authors = [ - "Andrew Gallant " - ]; - features = { - "default" = [ "std" ]; - }; - resolvedDefaultFeatures = [ "default" "std" ]; - }; "bytes" = rec { crateName = "bytes"; version = "1.10.1"; @@ -1601,65 +1588,6 @@ rec { "rand_core" = [ "dep:rand_core" ]; }; }; - "cssparser" = rec { - crateName = "cssparser"; - version = "0.34.0"; - edition = "2018"; - sha256 = "1qx3hha392szcl812l6hp0d4029gg8x62cl4nf0byqgdv0f6vimp"; - authors = [ - "Simon Sapin " - ]; - dependencies = [ - { - name = "cssparser-macros"; - packageId = "cssparser-macros"; - } - { - name = "dtoa-short"; - packageId = "dtoa-short"; - } - { - name = "itoa"; - packageId = "itoa"; - } - { - name = "phf"; - packageId = "phf"; - features = [ "macros" ]; - } - { - name = "smallvec"; - packageId = "smallvec"; - } - ]; - features = { - "serde" = [ "dep:serde" ]; - }; - }; - "cssparser-macros" = rec { - crateName = "cssparser-macros"; - version = "0.6.1"; - edition = "2018"; - sha256 = "0cfkzj60avrnskdmaf7f8zw6pp3di4ylplk455zrzaf19ax8id8k"; - procMacro = true; - libName = "cssparser_macros"; - libPath = "lib.rs"; - authors = [ - "Simon Sapin " - ]; - dependencies = [ - { - name = "quote"; - packageId = "quote"; - } - { - name = "syn"; - packageId = "syn 2.0.100"; - features = [ "full" "extra-traits" ]; - } - ]; - - }; "darling" = rec { crateName = "darling"; version = "0.20.11"; @@ -1803,49 +1731,6 @@ rec { }; resolvedDefaultFeatures = [ "alloc" "powerfmt" "serde" "std" ]; }; - "derive_more" = rec { - crateName = "derive_more"; - version = "0.99.19"; - edition = "2018"; - sha256 = "17y6g78dg31fsv7z4p455bzxs670spg476ww2ibg3mj3vww9m8ix"; - procMacro = true; - authors = [ - "Jelte Fennema " - ]; - dependencies = [ - { - name = "proc-macro2"; - packageId = "proc-macro2"; - } - { - name = "quote"; - packageId = "quote"; - } - { - name = "syn"; - packageId = "syn 2.0.100"; - } - ]; - features = { - "convert_case" = [ "dep:convert_case" ]; - "default" = [ "add_assign" "add" "as_mut" "as_ref" "constructor" "deref" "deref_mut" "display" "error" "from" "from_str" "index" "index_mut" "into" "into_iterator" "iterator" "mul_assign" "mul" "not" "sum" "try_into" "is_variant" "unwrap" ]; - "display" = [ "syn/extra-traits" ]; - "error" = [ "syn/extra-traits" ]; - "from" = [ "syn/extra-traits" ]; - "generate-parsing-rs" = [ "peg" ]; - "into" = [ "syn/extra-traits" ]; - "is_variant" = [ "convert_case" ]; - "mul" = [ "syn/extra-traits" ]; - "mul_assign" = [ "syn/extra-traits" ]; - "not" = [ "syn/extra-traits" ]; - "peg" = [ "dep:peg" ]; - "rustc_version" = [ "dep:rustc_version" ]; - "testing-helpers" = [ "rustc_version" ]; - "try_into" = [ "syn/extra-traits" ]; - "unwrap" = [ "convert_case" "rustc_version" ]; - }; - resolvedDefaultFeatures = [ "add" "add_assign" ]; - }; "digest" = rec { crateName = "digest"; version = "0.10.7"; @@ -1925,49 +1810,6 @@ rec { features = { }; }; - "dtoa" = rec { - crateName = "dtoa"; - version = "1.0.10"; - edition = "2018"; - sha256 = "016gid01rarcdv57h049d7nr9daxc2hc2gqzx0mji57krywd7bfn"; - authors = [ - "David Tolnay " - ]; - features = { - "no-panic" = [ "dep:no-panic" ]; - }; - }; - "dtoa-short" = rec { - crateName = "dtoa-short"; - version = "0.3.5"; - edition = "2015"; - sha256 = "11rwnkgql5jilsmwxpx6hjzkgyrbdmx1d71s0jyrjqm5nski25fd"; - libName = "dtoa_short"; - authors = [ - "Xidorn Quan " - ]; - dependencies = [ - { - name = "dtoa"; - packageId = "dtoa"; - } - ]; - - }; - "ego-tree" = rec { - crateName = "ego-tree"; - version = "0.10.0"; - edition = "2021"; - sha256 = "1n2csy99chk5v5vzjl0ff79vxpxhl76xmcb3aj6brrzzipmjz5xj"; - libName = "ego_tree"; - authors = [ - "June McEnroe " - "Carlo Federico Vescovo " - ]; - features = { - "serde" = [ "dep:serde" ]; - }; - }; "either" = rec { crateName = "either"; version = "1.15.0"; @@ -2313,26 +2155,6 @@ rec { }; resolvedDefaultFeatures = [ "alloc" "default" "std" ]; }; - "futf" = rec { - crateName = "futf"; - version = "0.1.5"; - edition = "2015"; - sha256 = "0hvqk2r7v4fnc34hvc3vkri89gn52d5m9ihygmwn75l1hhp0whnz"; - authors = [ - "Keegan McAllister " - ]; - dependencies = [ - { - name = "mac"; - packageId = "mac"; - } - { - name = "new_debug_unreachable"; - packageId = "new_debug_unreachable"; - } - ]; - - }; "futures" = rec { crateName = "futures"; version = "0.3.31"; @@ -2611,23 +2433,6 @@ rec { }; resolvedDefaultFeatures = [ "alloc" "async-await" "async-await-macro" "channel" "default" "futures-channel" "futures-io" "futures-macro" "futures-sink" "io" "memchr" "sink" "slab" "std" ]; }; - "fxhash" = rec { - crateName = "fxhash"; - version = "0.2.1"; - edition = "2015"; - sha256 = "037mb9ichariqi45xm6mz0b11pa92gj38ba0409z3iz239sns6y3"; - libPath = "lib.rs"; - authors = [ - "cbreeden " - ]; - dependencies = [ - { - name = "byteorder"; - packageId = "byteorder"; - } - ]; - - }; "gcp_auth" = rec { crateName = "gcp_auth"; version = "0.12.3"; @@ -2760,26 +2565,6 @@ rec { }; resolvedDefaultFeatures = [ "more_lengths" ]; }; - "getopts" = rec { - crateName = "getopts"; - version = "0.2.21"; - edition = "2015"; - sha256 = "1mgb3qvivi26gs6ihqqhh8iyhp3vgxri6vwyrwg28w0xqzavznql"; - authors = [ - "The Rust Project Developers" - ]; - dependencies = [ - { - name = "unicode-width"; - packageId = "unicode-width 0.1.14"; - } - ]; - features = { - "core" = [ "dep:core" ]; - "rustc-dep-of-std" = [ "unicode-width/rustc-dep-of-std" "std" "core" ]; - "std" = [ "dep:std" ]; - }; - }; "getrandom 0.2.15" = rec { crateName = "getrandom"; version = "0.2.15"; @@ -3153,35 +2938,6 @@ rec { ]; }; - "html5ever" = rec { - crateName = "html5ever"; - version = "0.29.1"; - edition = "2021"; - sha256 = "07518h5gbw0c6x7w5br76bgxvgphs6zlrb4q7ii7bg1ww7510x1v"; - authors = [ - "The html5ever Project Developers" - ]; - dependencies = [ - { - name = "log"; - packageId = "log"; - } - { - name = "mac"; - packageId = "mac"; - } - { - name = "markup5ever"; - packageId = "markup5ever"; - } - { - name = "match_token"; - packageId = "match_token"; - } - ]; - features = { - }; - }; "http" = rec { crateName = "http"; version = "1.3.1"; @@ -4715,78 +4471,6 @@ rec { }; resolvedDefaultFeatures = [ "std" ]; }; - "mac" = rec { - crateName = "mac"; - version = "0.1.1"; - edition = "2015"; - sha256 = "194vc7vrshqff72rl56f9xgb0cazyl4jda7qsv31m5l6xx7hq7n4"; - authors = [ - "Jonathan Reem " - ]; - - }; - "markup5ever" = rec { - crateName = "markup5ever"; - version = "0.14.1"; - edition = "2021"; - sha256 = "063sdq7hwxn2al9ygify8dd96mj57n9c4lig007lr1p128yj39y7"; - libPath = "lib.rs"; - authors = [ - "The html5ever Project Developers" - ]; - dependencies = [ - { - name = "log"; - packageId = "log"; - } - { - name = "phf"; - packageId = "phf"; - } - { - name = "string_cache"; - packageId = "string_cache"; - } - { - name = "tendril"; - packageId = "tendril"; - } - ]; - buildDependencies = [ - { - name = "phf_codegen"; - packageId = "phf_codegen"; - } - { - name = "string_cache_codegen"; - packageId = "string_cache_codegen"; - } - ]; - - }; - "match_token" = rec { - crateName = "match_token"; - version = "0.1.0"; - edition = "2021"; - sha256 = "0sx3212vkjqfblfhr556ayabbjflbigjf5j591j9kgs4infniac8"; - procMacro = true; - dependencies = [ - { - name = "proc-macro2"; - packageId = "proc-macro2"; - } - { - name = "quote"; - packageId = "quote"; - } - { - name = "syn"; - packageId = "syn 2.0.100"; - features = [ "full" ]; - } - ]; - - }; "matchers" = rec { crateName = "matchers"; version = "0.1.0"; @@ -5033,18 +4717,6 @@ rec { "vendored" = [ "openssl/vendored" ]; }; }; - "new_debug_unreachable" = rec { - crateName = "new_debug_unreachable"; - version = "1.0.6"; - edition = "2021"; - sha256 = "11phpf1mjxq6khk91yzcbd3ympm78m3ivl7xg6lg2c0lf66fy3k5"; - libName = "debug_unreachable"; - authors = [ - "Matt Brubeck " - "Jonathan Reem " - ]; - - }; "nu-ansi-term" = rec { crateName = "nu-ansi-term"; version = "0.46.0"; @@ -5608,142 +5280,6 @@ rec { "unstable" = [ "generate" ]; }; }; - "phf" = rec { - crateName = "phf"; - version = "0.11.3"; - edition = "2021"; - sha256 = "0y6hxp1d48rx2434wgi5g8j1pr8s5jja29ha2b65435fh057imhz"; - authors = [ - "Steven Fackler " - ]; - dependencies = [ - { - name = "phf_macros"; - packageId = "phf_macros"; - optional = true; - } - { - name = "phf_shared"; - packageId = "phf_shared"; - usesDefaultFeatures = false; - } - ]; - features = { - "default" = [ "std" ]; - "macros" = [ "phf_macros" ]; - "phf_macros" = [ "dep:phf_macros" ]; - "serde" = [ "dep:serde" ]; - "std" = [ "phf_shared/std" ]; - "uncased" = [ "phf_shared/uncased" ]; - "unicase" = [ "phf_macros?/unicase" "phf_shared/unicase" ]; - }; - resolvedDefaultFeatures = [ "default" "macros" "phf_macros" "std" ]; - }; - "phf_codegen" = rec { - crateName = "phf_codegen"; - version = "0.11.3"; - edition = "2021"; - sha256 = "0si1n6zr93kzjs3wah04ikw8z6npsr39jw4dam8yi9czg2609y5f"; - authors = [ - "Steven Fackler " - ]; - dependencies = [ - { - name = "phf_generator"; - packageId = "phf_generator"; - } - { - name = "phf_shared"; - packageId = "phf_shared"; - } - ]; - - }; - "phf_generator" = rec { - crateName = "phf_generator"; - version = "0.11.3"; - edition = "2021"; - crateBin = []; - sha256 = "0gc4np7s91ynrgw73s2i7iakhb4lzdv1gcyx7yhlc0n214a2701w"; - authors = [ - "Steven Fackler " - ]; - dependencies = [ - { - name = "phf_shared"; - packageId = "phf_shared"; - usesDefaultFeatures = false; - } - { - name = "rand"; - packageId = "rand 0.8.5"; - usesDefaultFeatures = false; - features = [ "small_rng" ]; - } - ]; - features = { - "criterion" = [ "dep:criterion" ]; - }; - }; - "phf_macros" = rec { - crateName = "phf_macros"; - version = "0.11.3"; - edition = "2021"; - sha256 = "05kjfbyb439344rhmlzzw0f9bwk9fp95mmw56zs7yfn1552c0jpq"; - procMacro = true; - authors = [ - "Steven Fackler " - ]; - dependencies = [ - { - name = "phf_generator"; - packageId = "phf_generator"; - } - { - name = "phf_shared"; - packageId = "phf_shared"; - usesDefaultFeatures = false; - } - { - name = "proc-macro2"; - packageId = "proc-macro2"; - } - { - name = "quote"; - packageId = "quote"; - } - { - name = "syn"; - packageId = "syn 2.0.100"; - features = [ "full" ]; - } - ]; - features = { - "unicase" = [ "unicase_" "phf_shared/unicase" ]; - "unicase_" = [ "dep:unicase_" ]; - }; - }; - "phf_shared" = rec { - crateName = "phf_shared"; - version = "0.11.3"; - edition = "2021"; - sha256 = "1rallyvh28jqd9i916gk5gk2igdmzlgvv5q0l3xbf3m6y8pbrsk7"; - authors = [ - "Steven Fackler " - ]; - dependencies = [ - { - name = "siphasher"; - packageId = "siphasher"; - } - ]; - features = { - "default" = [ "std" ]; - "uncased" = [ "dep:uncased" ]; - "unicase" = [ "dep:unicase" ]; - }; - resolvedDefaultFeatures = [ "default" "std" ]; - }; "pin-project" = rec { crateName = "pin-project"; version = "1.1.10"; @@ -5861,17 +5397,6 @@ rec { }; resolvedDefaultFeatures = [ "simd" "std" ]; }; - "precomputed-hash" = rec { - crateName = "precomputed-hash"; - version = "0.1.1"; - edition = "2015"; - sha256 = "075k9bfy39jhs53cb2fpb9klfakx2glxnf28zdw08ws6lgpq6lwj"; - libName = "precomputed_hash"; - authors = [ - "Emilio Cobos Álvarez " - ]; - - }; "prettyplease" = rec { crateName = "prettyplease"; version = "0.2.32"; @@ -7960,56 +7485,6 @@ rec { "default" = [ "use_std" ]; }; }; - "scraper" = rec { - crateName = "scraper"; - version = "0.23.1"; - edition = "2021"; - crateBin = []; - sha256 = "1qpz5py0a7y9mg2w4v1lidphz3arhw8dl4jcvf47aml8v3cnazjj"; - authors = [ - "June McEnroe " - ]; - dependencies = [ - { - name = "cssparser"; - packageId = "cssparser"; - } - { - name = "ego-tree"; - packageId = "ego-tree"; - } - { - name = "getopts"; - packageId = "getopts"; - optional = true; - } - { - name = "html5ever"; - packageId = "html5ever"; - } - { - name = "precomputed-hash"; - packageId = "precomputed-hash"; - } - { - name = "selectors"; - packageId = "selectors"; - } - { - name = "tendril"; - packageId = "tendril"; - } - ]; - features = { - "default" = [ "main" "errors" ]; - "deterministic" = [ "indexmap" ]; - "getopts" = [ "dep:getopts" ]; - "indexmap" = [ "dep:indexmap" ]; - "main" = [ "getopts" ]; - "serde" = [ "dep:serde" ]; - }; - resolvedDefaultFeatures = [ "default" "errors" "getopts" "main" ]; - }; "security-framework 2.11.1" = rec { crateName = "security-framework"; version = "2.11.1"; @@ -8132,69 +7607,6 @@ rec { }; resolvedDefaultFeatures = [ "OSX_10_10" "OSX_10_11" "OSX_10_12" "OSX_10_9" "default" ]; }; - "selectors" = rec { - crateName = "selectors"; - version = "0.26.0"; - edition = "2021"; - sha256 = "1s3zv30rqgdvil7mnfr4xq5nb9m8yp0sai42l28y565mkd68lmpx"; - libPath = "lib.rs"; - authors = [ - "The Servo Project Developers" - ]; - dependencies = [ - { - name = "bitflags"; - packageId = "bitflags 2.9.0"; - } - { - name = "cssparser"; - packageId = "cssparser"; - } - { - name = "derive_more"; - packageId = "derive_more"; - usesDefaultFeatures = false; - features = [ "add" "add_assign" ]; - } - { - name = "fxhash"; - packageId = "fxhash"; - } - { - name = "log"; - packageId = "log"; - } - { - name = "new_debug_unreachable"; - packageId = "new_debug_unreachable"; - } - { - name = "phf"; - packageId = "phf"; - } - { - name = "precomputed-hash"; - packageId = "precomputed-hash"; - } - { - name = "servo_arc"; - packageId = "servo_arc"; - } - { - name = "smallvec"; - packageId = "smallvec"; - } - ]; - buildDependencies = [ - { - name = "phf_codegen"; - packageId = "phf_codegen"; - } - ]; - features = { - "to_shmem" = [ "dep:to_shmem" "dep:to_shmem_derive" ]; - }; - }; "serde" = rec { crateName = "serde"; version = "1.0.219"; @@ -8539,26 +7951,6 @@ rec { features = { }; }; - "servo_arc" = rec { - crateName = "servo_arc"; - version = "0.4.0"; - edition = "2021"; - sha256 = "06ljch4isnnbv1xpwhjajz4a4mpc7ki47ys9n9yn98kqjhjc8rdf"; - libPath = "lib.rs"; - authors = [ - "The Servo Project Developers" - ]; - dependencies = [ - { - name = "stable_deref_trait"; - packageId = "stable_deref_trait"; - } - ]; - features = { - "serde" = [ "dep:serde" ]; - "servo" = [ "serde" "track_alloc_size" ]; - }; - }; "sharded-slab" = rec { crateName = "sharded-slab"; version = "0.1.7"; @@ -8629,14 +8021,6 @@ rec { name = "rusqlite"; packageId = "rusqlite"; } - { - name = "scraper"; - packageId = "scraper"; - } - { - name = "serde_json"; - packageId = "serde_json"; - } { name = "snix-castore"; packageId = "snix-castore"; @@ -8674,23 +8058,6 @@ rec { ]; }; - "siphasher" = rec { - crateName = "siphasher"; - version = "1.0.1"; - edition = "2018"; - sha256 = "17f35782ma3fn6sh21c027kjmd227xyrx06ffi8gw4xzv9yry6an"; - authors = [ - "Frank Denis " - ]; - features = { - "default" = [ "std" ]; - "serde" = [ "dep:serde" ]; - "serde_json" = [ "dep:serde_json" ]; - "serde_no_std" = [ "serde/alloc" ]; - "serde_std" = [ "std" "serde/std" ]; - }; - resolvedDefaultFeatures = [ "default" "std" ]; - }; "slab" = rec { crateName = "slab"; version = "0.4.9"; @@ -9110,75 +8477,7 @@ rec { "default" = [ "std" ]; "std" = [ "alloc" ]; }; - resolvedDefaultFeatures = [ "alloc" "default" "std" ]; - }; - "string_cache" = rec { - crateName = "string_cache"; - version = "0.8.9"; - edition = "2018"; - sha256 = "03z7km2kzlwiv2r2qifq5riv4g8phazwng9wnvs3py3lzainnxxz"; - authors = [ - "The Servo Project Developers" - ]; - dependencies = [ - { - name = "new_debug_unreachable"; - packageId = "new_debug_unreachable"; - } - { - name = "parking_lot"; - packageId = "parking_lot"; - } - { - name = "phf_shared"; - packageId = "phf_shared"; - } - { - name = "precomputed-hash"; - packageId = "precomputed-hash"; - } - { - name = "serde"; - packageId = "serde"; - optional = true; - } - ]; - features = { - "default" = [ "serde_support" ]; - "malloc_size_of" = [ "dep:malloc_size_of" ]; - "serde" = [ "dep:serde" ]; - "serde_support" = [ "serde" ]; - }; - resolvedDefaultFeatures = [ "default" "serde" "serde_support" ]; - }; - "string_cache_codegen" = rec { - crateName = "string_cache_codegen"; - version = "0.5.4"; - edition = "2018"; - sha256 = "181ir4d6y053s1kka2idpjx5g9d9jgll6fy517jhzzpi2n3r44f7"; - libPath = "lib.rs"; - authors = [ - "The Servo Project Developers" - ]; - dependencies = [ - { - name = "phf_generator"; - packageId = "phf_generator"; - } - { - name = "phf_shared"; - packageId = "phf_shared"; - } - { - name = "proc-macro2"; - packageId = "proc-macro2"; - } - { - name = "quote"; - packageId = "quote"; - } - ]; - + resolvedDefaultFeatures = [ "alloc" ]; }; "strsim" = rec { crateName = "strsim"; @@ -9419,35 +8718,6 @@ rec { }; resolvedDefaultFeatures = [ "default" "getrandom" ]; }; - "tendril" = rec { - crateName = "tendril"; - version = "0.4.3"; - edition = "2015"; - sha256 = "1c3vip59sqwxn148i714nmkrvjzbk7105vj0h92s6r64bw614jnj"; - authors = [ - "Keegan McAllister " - "Simon Sapin " - "Chris Morgan " - ]; - dependencies = [ - { - name = "futf"; - packageId = "futf"; - } - { - name = "mac"; - packageId = "mac"; - } - { - name = "utf-8"; - packageId = "utf-8"; - } - ]; - features = { - "encoding" = [ "dep:encoding" ]; - "encoding_rs" = [ "dep:encoding_rs" ]; - }; - }; "thiserror 1.0.69" = rec { crateName = "thiserror"; version = "1.0.69"; @@ -11055,17 +10325,6 @@ rec { }; resolvedDefaultFeatures = [ "default" "std" ]; }; - "utf-8" = rec { - crateName = "utf-8"; - version = "0.7.6"; - edition = "2015"; - sha256 = "1a9ns3fvgird0snjkd3wbdhwd3zdpc2h5gpyybrfr6ra5pkqxk09"; - libName = "utf8"; - authors = [ - "Simon Sapin " - ]; - - }; "utf16_iter" = rec { crateName = "utf16_iter"; version = "1.0.5"; diff --git a/Cargo.toml b/Cargo.toml index a75186e..9b6b61b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,11 +9,7 @@ clap = "4.5.35" futures = "0.3.31" reqwest = "0.12.15" rusqlite = "0.34.0" -scraper = "0.23.1" -serde = "1.0.219" -serde_json = "1.0.140" snix-castore = { version = "0.1.0", git = "https://git.snix.dev/snix/snix.git" } tokio = "1.44.2" -tokio-stream = "0.1.17" tokio-util = "0.7.14" url = "2.5.4" diff --git a/README.md b/README.md index 1e072bb..073d99a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ -[sidx](https://forge.someonex.net/else/sidx) +sidx === -Work in Progress. Indexing archives and build outputs. @@ -18,8 +17,4 @@ Roadmap Approach --- -Vapourware and means to an end: -[this project](https://forge.someonex.net/else/sidx) was originally motivated by the needs of maintaining -`cudaPackages` in Nixpkgs. -Specifically, it attempts to answer the question of "what is there to be maintained", -improve [observability and debug-ability of the package set (cf. demo)](https://cuda-index.someonex.net/sidx/UriReference). +Vapourware and means to an end. diff --git a/default.nix b/default.nix index 01ecf46..4ae9a1a 100644 --- a/default.nix +++ b/default.nix @@ -47,118 +47,5 @@ lib.makeScope pkgs.newScope ( } ); sidx = self.sidx-crate2nix.rootCrate.build; - datasette-wrapped = self.callPackage ( - { - datasette, - datasette-assets, - makeWrapper, - runCommand, - }: - runCommand "datasettew" - { - nativeBuildInputs = [ makeWrapper ]; - preferLocalBuild = true; - allowSubstitutes = false; - } - '' - mkdir -p "$out/bin" - makeWrapper ${lib.getExe datasette} "$out/bin/datasettew" \ - --append-flags --metadata=${datasette-assets}/metadata.json \ - --append-flags --static=static:${datasette-assets}/static - '' - ) { }; - datasette-assets = self.callPackage ( - { - runCommand, - datasette-metadata, - datasette-settings, - }: - runCommand "datasette-assets" - { - preferLocalBuild = true; - allowSubstitutes = false; - } - '' - mkdir "$out" - cp --no-preserve=mode -r ${./static} "$out"/static - cp ${datasette-metadata} "$out"/metadata.json - cp ${datasette-settings} "$out"/settings.json - '' - ) { }; - datasette-settings = self.callPackage ( - { formats }: - (formats.json { }).generate "datasette-settings.json" { - sql_time_limit_ms = 8000; - } - ) { }; - datasette-metadata = self.callPackage ( - { formats }: - (formats.json { }).generate "datasette-metadata.json" { - title = "CUDA INDEX"; - description_html = '' -

Visualizing the contents of Nixpkgs' cudaPackages. - Generated via an ad-hoc indexing tool. -

- ''; - "extra_css_urls" = [ - "/static/some.css" - ]; - "databases" = { - "sidx" = { - "tables" = { - "Hash" = { - "label_column" = "hash"; - }; - "CudaArtifact" = { - facets = [ - "pname" - "platform" - ]; - }; - }; - queries.cuda_conflicts = { - title = "CudaArtifact Conflicts"; - description_html = '' - CudaArtifacts (identified by sha256) - claiming the same (pname, version, platform) triple - ''; - sql = '' - SELECT - COUNT(DISTINCT sha256) AS conflicts, - pname.str AS pname, - ver.str AS ver, - plat.str AS plat, - GROUP_CONCAT(name.str, char(10)) AS name, - GROUP_CONCAT(tag.str, char(10)) AS tag, - GROUP_CONCAT(h.hash, char(10)) AS sha256 - FROM - ( - CudaArtifact AS cc, - Str AS name, - Str AS pname, - Str as ver, - Str as plat, - Hash as h - ON cc.name=name.id - AND cc.pname=pname.id - AND cc.version = ver.id - AND cc.platform = plat.id - AND cc.sha256 = h.id - ) - LEFT JOIN Str AS tag - ON - cc.compat_tag=tag.id - GROUP BY - cc.pname, cc.version, cc.platform - HAVING - conflicts >= CAST(:min_conflicts AS INTEGER) - ORDER BY conflicts DESC - ''; - }; - }; - }; - - } - ) { }; } ) diff --git a/shell.nix b/shell.nix index 50c74c7..0336a60 100644 --- a/shell.nix +++ b/shell.nix @@ -2,20 +2,17 @@ npins ? import ./npins, nixpkgs ? npins.nixpkgs, pkgs ? import nixpkgs { }, - self ? import ./. { inherit nixpkgs pkgs npins; }, lib ? pkgs.lib, mkShell ? pkgs.mkShell, sqlite ? pkgs.sqlite, openssl ? pkgs.openssl, rust-analyzer ? pkgs.rust-analyzer, rustc ? pkgs.rustc, - rustfmt ? pkgs.rustfmt, cargo ? pkgs.cargo, pkg-config ? pkgs.pkg-config, crate2nix ? pkgs.crate2nix, protobuf ? pkgs.protobuf, - datasette-wrapped ? self.datasette-wrapped, - datasette-assets ? self.datasette-assets, + datasette ? pkgs.datasette, ... }: mkShell { @@ -25,19 +22,13 @@ mkShell { cargo crate2nix rustc - rustfmt rust-analyzer pkg-config protobuf - datasette-wrapped + datasette ]; buildInputs = [ openssl sqlite ]; - DATASETTE_ASSETS = datasette-assets; # uploaded to cuda-index.someonex.net in bulk... - shellHook = '' - export DATABASE_PATH="$HOME/.local/share/sidx/sidx.db" - unset out outputs phases - ''; } diff --git a/src/main.rs b/src/main.rs index ec3a3e5..482aae7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,29 +1,11 @@ -use std::collections::{HashMap, HashSet}; -use std::marker::PhantomData; -use std::path::{PathBuf, absolute}; -use std::str::FromStr; -use std::sync::Arc; -use std::{fmt, io}; +use std::path::{absolute, PathBuf}; +use anyhow::anyhow; use anyhow::Context; -use anyhow::{Error, anyhow}; use clap::Parser; -use clap::Subcommand; -use futures::{StreamExt, TryStreamExt, stream}; -use rusqlite::fallible_iterator::FallibleIterator as _; -use rusqlite::{OptionalExtension, named_params, params}; -use scraper::{Html, Selector}; -use serde::de::{self, Visitor}; -use serde::{Deserialize, Serialize}; -use snix_castore::B3Digest; -use snix_castore::blobservice::BlobService; -use snix_castore::directoryservice::DirectoryService; +use futures::{stream, StreamExt, TryStreamExt}; +use rusqlite::{params, OptionalExtension}; use snix_castore::{blobservice, directoryservice, import::fs::ingest_path}; -use std::sync::Mutex; -use tokio::io::{AsyncReadExt, BufReader}; -use tokio::sync::Semaphore; -use tokio::sync::mpsc::{Sender, channel}; -use tokio_stream::wrappers::ReceiverStream; use url::Url; #[derive(Clone, Debug)] @@ -32,33 +14,20 @@ enum Ingestable { Path(PathBuf), } -#[derive(Debug, Clone)] -enum SampledWhen { +#[derive(Debug)] +enum IngestedWhen { Now, Before, } -#[derive(Debug, Clone)] -struct SizedBlob { - hash: B3Digest, - n_bytes: u64, -} - -#[derive(Debug, Clone)] +#[derive(Debug)] #[allow(dead_code)] -struct Sampled { +struct Ingested { sample_id: u32, uri: String, - blob: Option, - http_status: Option, + blake3: String, epoch: u32, - when: SampledWhen, -} - -#[derive(Clone)] -enum FetchListingMessage { - Sampled(Url, Sampled), - Recurse(Url, usize, Sender), + when: IngestedWhen, } impl std::fmt::Display for Ingestable { @@ -75,7 +44,7 @@ impl std::fmt::Display for Ingestable { } } -fn parse_url_or_path(s: &str) -> Result { +fn parse_url_or_path(s: &str) -> Result { if s.is_empty() { Err(anyhow!("Empty path (url)")) } else if s.starts_with("./") || s.starts_with("/") { @@ -99,7 +68,7 @@ fn parse_url_or_path(s: &str) -> Result { fn data_path() -> PathBuf { let xdg_data_dir = std::env::var("XDG_DATA_DIR") .and_then(|s| Ok(PathBuf::from(s))) - .or_else(|_| -> Result { + .or_else(|_| -> Result { match std::env::home_dir() { Some(p) => Ok(p.join(".local/share")), None => Err(anyhow!("...")), // FIXME @@ -117,38 +86,15 @@ fn default_db_path() -> PathBuf { data_path().join("sidx.db") } -#[derive(Subcommand)] -enum Command { - Ingest { - #[clap(value_parser = parse_url_or_path, num_args = 1)] - inputs: Vec, - }, - FetchListing { - #[clap(value_parser, long, default_value_t = 5)] - max_depth: usize, - #[clap(value_parser, long, default_value_t = 1024 * 1024)] - html_max_bytes: u64, - #[clap(value_parser, num_args = 1)] - inputs: Vec, - }, - ParseUrl { - #[clap(value_parser, num_args = 1)] - url: Vec, - }, - DemoCudaManifest, - FormatCudaManifest, - ProcessCudaManifests { - #[clap(short, long, action)] - include_finished: bool, - }, -} - #[derive(Parser)] struct Cli { + #[clap(value_parser = parse_url_or_path, num_args = 1)] + inputs: Vec, + #[clap(short, long, action)] refetch: bool, - #[clap(short, long, value_parser, default_value_t = 2)] + #[clap(short, long, value_parser, default_value_t = 5)] max_parallel: usize, #[clap(short, long, value_parser, default_value_os_t = default_db_path())] @@ -156,43 +102,22 @@ struct Cli { #[clap(short, long, value_parser, default_value_os_t = default_castore_path())] castore_path: PathBuf, - - #[command(subcommand)] - command: Option, } -struct SidxContext -where - BS: blobservice::BlobService + Clone + Send + 'static, - DS: directoryservice::DirectoryService + Clone + Send + 'static, -{ - refetch: bool, - max_parallel: usize, - http: reqwest::Client, - http_semaphore: Arc, - con: Arc>, - blob_service: BS, - dir_service: DS, -} +#[tokio::main] +async fn main() { + let args = Cli::parse(); -async fn open_context( - refetch: bool, - max_parallel: usize, - db_path: PathBuf, - castore_path: PathBuf, -) -> SidxContext, Arc> { - if let Some(p) = db_path.parent() { + args.db_path.parent().and_then(|p| { let _ = std::fs::create_dir_all(p); - } + Some(()) + }); - let con = rusqlite::Connection::open(&db_path).expect("Failed to construct Database object"); - con.pragma_update(None, "jorunal_mode", "wal").unwrap(); - con.pragma_update(None, "synchronous", "normal").unwrap(); - con.pragma_update(None, "temp_store", "memory").unwrap(); - con.pragma_update(None, "foreign_keys", "on").unwrap(); - con.execute_batch(include_str!("q/sidx-init.sql")) - .expect("Failed to execute sidx-init.sql"); - let castore_path = absolute(castore_path).expect("Failed to canonicalize castore_path"); + let con = + rusqlite::Connection::open(&args.db_path).expect("Failed to construct Database object"); + con.execute_batch(include_str!("q/init.sql")) + .expect("Failed to execute init.sql"); + let castore_path = absolute(args.castore_path).expect("Failed to canonicalize castore_path"); let blob_service = blobservice::from_addr(&std::format!( "objectstore+file://{}", castore_path @@ -205,857 +130,116 @@ async fn open_context( let dir_service = directoryservice::from_addr(&std::format!( "objectstore+file://{}", castore_path - .join("directories") + .join("directory") .to_str() .expect("Path::to_str unexpectedly broken") )) .await - .expect("Couldn't initialize .castore/directories"); + .expect("Couldn't initialize .castore/directory"); - SidxContext::, Arc> { - refetch, - max_parallel, - http: reqwest::Client::new(), - http_semaphore: Arc::new(Semaphore::new(max_parallel)), - con: Arc::new(Mutex::new(con)), - blob_service, - dir_service, - } -} - -impl Drop for SidxContext -where - BS: BlobService + Clone, - DS: DirectoryService + Clone, -{ - fn drop(&mut self) { - let con = self - .con - .lock() - .expect("Acquiring mutex for sqlite to run #pragma optimize before exit"); - con.pragma_update(None, "analysis_limit", 500).unwrap(); - con.pragma_query(None, "optimize", |_| Ok(())).unwrap(); - } -} - -impl SidxContext { - async fn latest_sample(&self, uri: &str) -> Result, Error> { - let lock = self.con.lock().unwrap(); - let mut find_sample = lock - .prepare_cached(include_str!("q/latest-download.sql")) + let client = reqwest::Client::new(); + let samples = stream::iter(args.inputs.iter().map(|uri| { + let client = &client; + let blob_service = &blob_service; + let dir_service = &dir_service; + let con = &con; + let mut find_sample = con + .prepare(include_str!("q/latest-download.sql")) .expect("Failed to prepare latest-download.sql"); - find_sample - .query_row(params![uri], |r| { - <(u32, String, u64, Option, u32)>::try_from(r) - }) - .optional() - .context("db_latest_download.sql") - .and_then(|maybe_tuple| match maybe_tuple { - Some((sample_id, hash, n_bytes, http_code, epoch)) => Ok(Some(Sampled { - sample_id, - uri: uri.to_string(), - blob: Some(SizedBlob { - hash: B3Digest::from_str(&hash)?, - n_bytes, - }), - http_status: http_code, - epoch, - when: SampledWhen::Before, - })), - None => Ok(None), - }) - } - async fn db_add_sample( - &self, - uri: &str, - hash: &Option, - http_code: &Option, - content_type: &Option, - ) -> Result<(u32, u32), Error> { - let lock = self.con.lock().expect("Locking mutex for db_add_sample"); - let mut add_sample = lock - .prepare_cached(include_str!("q/add-sample.sql")) - .context("Failed to prepare add-sample.sql")?; - Ok(add_sample.query_row( - named_params! { - ":uri": uri, - ":hash": hash, - ":http_code": http_code, - ":content_type": content_type - }, - |row| <(u32, u32)>::try_from(row), - )?) - } - async fn db_add_blob(&self, hash: &str, n_bytes: u64) -> Result { - let lock = self.con.lock().expect("db_add_blob: couldn't lock mutex?"); - let mut add_blob = lock - .prepare_cached(include_str!("q/upsert-blob.sql")) - .context("Failed to prepare upsert-blob.sql")?; - Ok(add_blob.execute(params![hash, n_bytes,])?) - } - async fn db_add_uri(&self, uri: &str) -> Result { - let lock = self.con.lock().unwrap(); - let mut add_uri = lock - .prepare_cached(include_str!("q/upsert-uri.sql")) - .context("Failed to prepare upsert-uri.sql")?; + let mut add_sample = con + .prepare(include_str!("q/add-sample.sql")) + .expect("Failed to prepare add-sample.sql"); + let mut add_blob = con + .prepare(include_str!("q/upsert-blob.sql")) + .expect("Failed to prepare upsert-blob.sql"); + let mut add_uri = con + .prepare(include_str!("q/upsert-uri.sql")) + .expect("Failed to prepare upsert-uri.sql"); - Ok(add_uri.execute(params![uri])?) - } - async fn record_ingested_node( - &self, - uri: &str, - blob: &Option, - http_code: Option, - content_type: Option, - ) -> Result { - let digest64 = if let Some(SizedBlob { hash, n_bytes }) = blob { - let digest64 = format!("{}", hash); - self.db_add_blob(&digest64, n_bytes.clone()).await?; - Some(digest64) - } else { - None - }; - self.db_add_uri(&uri).await?; - let (sample_id, epoch) = self - .db_add_sample(&uri, &digest64, &http_code, &content_type) - .await?; - Ok(Sampled { - sample_id, - uri: uri.to_string(), - blob: blob.clone(), - http_status: http_code, - epoch, - when: SampledWhen::Now, - }) - } - async fn download(&self, uri: &Url) -> Result { - let _permit = self.http_semaphore.acquire().await.unwrap(); - eprintln!("Downloading {:?}", uri.to_string()); - let uri_s = uri.to_string(); - let res = self - .http - .get(uri.clone()) - .send() - .await - .context(format!("Request::send failed early for {:?}", uri))?; - - let status = res.status(); - let status_code = status.as_u16(); - let content_type = res - .headers() - .get(reqwest::header::CONTENT_TYPE) - .and_then(|x| x.to_str().ok()) - .map(|x| x.to_string()); - - if status.is_success() { - let mut r = tokio_util::io::StreamReader::new( - res.bytes_stream().map_err(std::io::Error::other), - ); - let mut w = self.blob_service.open_write().await; - let n_bytes = match tokio::io::copy(&mut r, &mut w).await { - Ok(n) => n, - Err(e) => { - return Err(anyhow!( - "tokio::io::copy failed for uri={} with {}", - uri_s, - e - )); - } - }; - let digest = w.close().await?; - - self.record_ingested_node( - &uri_s, - &Some(SizedBlob { - hash: digest, - n_bytes, - }), - Some(status_code), - content_type, - ) - .await - } else { - self.record_ingested_node(&uri_s, &None, Some(status_code), content_type) - .await - } - } - async fn ensure_blob(&self, hash: &B3Digest) -> Result<(), Error> { - if self - .blob_service - .has(hash) - .await - .context("ensure_has() accessing blob_service")? - { - Ok(()) - } else { - let b64 = hash.to_string(); - let uris = { - let con = self.con.lock().unwrap(); - let mut find_uris = con - .prepare_cached(include_str!("q/uris-of-hash.sql")) - .context("Preparing statement: q/uris-of-hash.sql") - .unwrap(); - find_uris - .query(named_params! {":hash": b64, ":limit": 100})? - .map(|b| b.get(0)) - .collect::>()? - }; - if uris.is_empty() { - return Err(anyhow!("No uris recorded for {}", b64)); - }; - for uri in uris { - let url = match Url::parse(&uri) { - Ok(url) => url, - Err(_) => continue, - }; - match self - .download(&url) - .await - .context("Redownloading missing blob for ensure_hash") - { - Ok(Sampled { - sample_id: _, - uri: _, - blob, - http_status: _, - epoch: _, - when: _, - }) => { - if blob.map_or(false, |sb| sb.hash == *hash) { - return Ok(()); - } else { - continue; - } - } - Err(_) => { - continue; - } + async move { + let uri_s = uri.to_string(); + let latest_download = find_sample + .query_row(params![uri_s], |r| <(u32, String, u32)>::try_from(r)) + .optional()?; + if let Some((sample_id, blake3, epoch)) = latest_download { + if !args.refetch { + return Ok::, anyhow::Error>(Some(Ingested { + sample_id, + uri: uri_s, + blake3, + epoch, + when: IngestedWhen::Before, + })); } } - Err(anyhow!( - "All uris for {} are out of date (result in errors or different hashes)", - b64 - )) - } - } - async fn ensure_sampled_uri(&self, uri: &Url) -> Result { - /* TODO: flatten */ - if self.refetch { - self.download(&uri).await - } else { - /* TODO: Add negative TTL */ - match self.latest_sample(&uri.to_string()).await? { - Some(ingested) => match ingested.blob.clone() { - Some(SizedBlob { hash, n_bytes: _ }) => { - if self.blob_service.has(&hash).await? { - Ok(ingested) - } else { - self.download(&uri).await - } - } - None => self.download(&uri).await, - }, - None => self.download(&uri).await, - } - } - } - async fn ingest(&self, inputs: &Vec) -> Vec, Error>> { - let samples = stream::iter(inputs.iter().map(|uri| { - let blob_service = &self.blob_service; - let dir_service = &self.dir_service; - - async move { - let uri_s = uri.to_string(); - let latest_download = self.latest_sample(&uri_s).await?; - if latest_download.is_some() { - return Ok(latest_download); - } - match uri { - Ingestable::Path(path) => { - match ingest_path::<_, _, _, &[u8]>(&blob_service, &dir_service, path, None) - .await? - { - snix_castore::Node::Directory { digest, size } => self - .record_ingested_node( - &uri_s, - &Some(SizedBlob { - hash: digest, - n_bytes: size, - }), - None, - None, - ) - .await - .map(Some), - - snix_castore::Node::File { - digest, - size, - executable: _, - } => self - .record_ingested_node( - &uri_s, - &Some(SizedBlob { - hash: digest, - n_bytes: size, - }), - None, - None, - ) - .await - .map(Some), - snix_castore::Node::Symlink { target: _ } => { - Err(anyhow!("TODO: Figure out what to do with symlink roots")) - } - } - } - Ingestable::Url(url) => self.ensure_sampled_uri(url).await.map(Some), - } - } - })) - .buffer_unordered(self.max_parallel) - .collect::, _>>>() - .await; - - samples - } - - fn extract_hrefs(content: &str) -> Result, Error> { - let sel = Selector::parse("a").map_err(|e| anyhow!(e.to_string()))?; - let html = Html::parse_document(&content); - - Ok(html - .select(&sel) - .flat_map(|elt| elt.value().attr("href")) - .map(|s| s.to_string()) - .collect::>()) - } - - async fn fetch_from_listing_impl( - self: Arc, - url: Url, - max_depth: usize, - html_max_bytes: u64, - tx: Sender, - ) -> Result<(), Error> { - let maybe_root = self.ensure_sampled_uri(&url).await; - if let Err(ref e) = maybe_root { - eprintln!("Couldn't download {}: {:?}", url, e); - }; - let root = maybe_root?; - tx.send(FetchListingMessage::Sampled(url.clone(), root.clone())) - .await - .context("Stopped accepting tasks before processing an Ingested notification")?; - if max_depth <= 0 { - return Ok(()); - } - - match root.blob { - None => Err(anyhow!( - "Couldn't download {}. Status code: {:?}", - url, - root.http_status - )), - Some(SizedBlob { hash, n_bytes }) => { - if n_bytes > html_max_bytes { - return Ok(()); - } - match self.blob_service.open_read(&hash).await? { - Some(mut reader) => { - let content = { - let mut br = BufReader::new(&mut *reader); - let mut content = String::new(); - br.read_to_string(&mut content).await?; - content - }; - let hrefs = Self::extract_hrefs(&content).unwrap_or(vec![]); - /* max_depth > 0 here */ - for href in hrefs.clone() { - let next_url = url.join(&href).context("Constructing next_url")?; - tx.send(FetchListingMessage::Recurse( - next_url.clone(), - max_depth - 1, - tx.clone(), - )) - .await - .context("Stopped accepting tasks before finishing all hrefs")?; - } - { - let lock = self.con.lock().expect("Couldn't acquire Mutex?"); - for href in hrefs { - let mut stmt = - lock.prepare_cached(include_str!("q/add-str.sql"))?; - stmt.execute(params!["href"])?; - - let next_url = url.join(&href).context("Constructing next_url")?; - let mut stmt = - lock.prepare_cached(include_str!("q/add-uri-ref.sql"))?; - let digest64 = hash.to_string(); - stmt.execute(named_params! {":source": digest64, ":target": next_url.to_string(), ":why": "href"})?; - } - }; - Ok(()) - } - None => Err(anyhow!("Couldn't read the ingested blob")), - } - } - } - } - - async fn fetch_from_listing( - self: Arc, - url: Url, - max_depth: usize, - html_max_bytes: u64, - ) -> ReceiverStream { - let mq_size = 10; - - /* TODO: move task queue to e.g. sqlite */ - let (tx, mut rx) = channel(mq_size); - - let (out_tx, out_rx) = channel(mq_size); - - tokio::spawn({ - async move { - let mut seen: HashSet = HashSet::new(); - { - let tx_moved = tx; - tx_moved - .send(FetchListingMessage::Recurse( - url, - max_depth, - tx_moved.clone(), - )) - .await - .expect("fetch_from_listing failed populating the queue"); - }; - while let Some(m) = rx.recv().await { - match m { - FetchListingMessage::Sampled(_url, ingested) => { - out_tx - .send(ingested) - .await - .expect("ReceiverStream failed to accept an Ingestable"); - } - FetchListingMessage::Recurse(url, max_depth, tx) => { - if max_depth > 0 && !seen.contains(&url.to_string()) { - seen.insert(url.to_string()); - tokio::spawn({ - let s = self.clone(); - let url = url.clone(); - async move { - s.fetch_from_listing_impl( - url, - max_depth, - html_max_bytes, - tx, - ) - .await - } - }); - } - } - } - } - } - }); - ReceiverStream::new(out_rx) - } -} - -fn string_or_int<'de, T, D>(deserializer: D) -> Result -where - T: Deserialize<'de> + TryFrom + FromStr, - D: serde::Deserializer<'de>, -{ - struct StringOrInt(PhantomData T>); - - impl<'de, T> Visitor<'de> for StringOrInt - where - T: Deserialize<'de> + TryFrom + FromStr, - { - type Value = T; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("string or int") - } - - fn visit_u64(self, value: u64) -> Result - where - E: de::Error, - { - T::try_from(value).map_err(|_e| de::Error::custom("ignored error")) - } - - fn visit_str(self, value: &str) -> Result - where - E: de::Error, - { - FromStr::from_str(value).map_err(de::Error::custom) - } - } - - deserializer.deserialize_any(StringOrInt(PhantomData)) -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -struct CudaArtifact { - relative_path: String, - sha256: String, - md5: Option, - - // Tha manifests export size as string instead of number - #[serde(deserialize_with = "string_or_int")] - size: i64, -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(untagged)] -enum CudaArtifactsByTag { - Single(CudaArtifact), - Many { - #[serde(flatten)] - by_tag: HashMap, - }, -} -impl IntoIterator for CudaArtifactsByTag { - type Item = (Option, CudaArtifact); - type IntoIter = std::vec::IntoIter; - - fn into_iter(self) -> std::vec::IntoIter { - match self { - CudaArtifactsByTag::Single(art) => vec![(None, art)].into_iter(), - CudaArtifactsByTag::Many { by_tag: by_compat } => by_compat - .iter() - .map(|(k, x)| (Some(k.clone()), x.clone())) - .collect::>() - .into_iter(), - } - } -} -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(untagged)] -enum CudaArtifactsByPlatform { - Binary { - #[serde(flatten)] - by_platform: HashMap, - }, - Source { - source: CudaArtifact, - }, -} - -impl IntoIterator for CudaArtifactsByPlatform { - type Item = (String, Option, CudaArtifact); - - /* TODO: Figure out which is the trait that doesn't involve copying */ - type IntoIter = std::vec::IntoIter<(String, Option, CudaArtifact)>; - - fn into_iter(self) -> Self::IntoIter { - match self { - CudaArtifactsByPlatform::Binary { by_platform } => by_platform - .iter() - .flat_map(|(platform, by_tag)| { - by_tag - .clone() - .into_iter() - .map(|(tag, artifact)| (platform.clone(), tag.clone(), artifact)) - }) - .collect::>() - .into_iter(), - CudaArtifactsByPlatform::Source { source } => { - (vec![("source".to_string(), None, source)]).into_iter() - } - } - } -} - -#[derive(Serialize, Deserialize, Debug)] -struct CudaJsonPackage { - name: Option, - license: String, - license_path: Option, - version: String, - - cuda_variant: Option>, - - #[serde(flatten)] - artifacts: CudaArtifactsByPlatform, -} - -#[derive(Serialize, Deserialize, Debug)] -struct CudaJsonManifest { - release_date: Option, - release_label: Option, - release_product: Option, - - #[serde(flatten)] - by_pname: HashMap, -} - -#[tokio::main] -async fn main() { - let args = Cli::parse(); - - let _cwd = std::env::current_dir().expect("Couldn't get CWD"); - let _host_name = std::env::var("HOSTNAME").map_or(None, Some); - - let ctx = Arc::new( - open_context( - args.refetch, - args.max_parallel, - args.db_path, - args.castore_path, - ) - .await, - ); - - match args.command { - Some(Command::Ingest { inputs }) => { - let samples = ctx.ingest(&inputs).await; - for s in samples { - match s { - Err(e) => { - eprintln!("Failed to fetch: {}", e); - } - Ok(None) => {} - Ok(Some(ingested)) => { - eprintln!("{:?}", ingested) - } - } - } - } - Some(Command::FetchListing { - max_depth, - html_max_bytes, - inputs, - }) => { - let ingested: Vec = stream::iter(inputs) - .then(async |i| { - let i = i.clone(); - ctx.clone() - .fetch_from_listing(i, max_depth, html_max_bytes) - .await - }) - .flatten_unordered(args.max_parallel) - .collect() - .await; - for i in ingested { - eprintln!("{:?}", i); - } - } - Some(Command::ParseUrl { url: urls }) => { - for url in urls { - println!("{:?}", url); - } - } - Some(Command::FormatCudaManifest) => { - println!( - "{}", - serde_json::to_string( - &serde_json::from_reader::<_, CudaJsonManifest>(io::stdin()).unwrap() - ) - .unwrap() - ); - } - Some(Command::DemoCudaManifest) => { - println!( - "{}", - serde_json::to_string(&CudaJsonManifest { - release_date: Some("1984-01-01".to_string()), - release_label: Some("8.9.x".to_string()), - release_product: Some("cudnn".to_string()), - by_pname: HashMap::from([ - ( - "cudnn".to_string(), - CudaJsonPackage { - name: Some("cuDNN Library".to_string()), - license: "cudnn".to_string(), - license_path: Some("bar/foo".to_string()), - version: "8.9.7.6".to_string(), - cuda_variant: Some(vec!["11".to_string(), "12".to_string()]), - artifacts: CudaArtifactsByPlatform::Binary { - by_platform: HashMap::from([ - ("x86_64-linux".to_string(), - CudaArtifactsByTag::Many { - by_tag: - HashMap::from([ - ("cuda11" - .to_string(), - CudaArtifact{ - relative_path: - "kek".to_string(), - sha256: "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824".to_string(), - md5: Some("5d41402abc4b2a76b9719d911017c592".to_string()), - size: 5 })])}) - - ]), - } - }), - ( - "cuda_samples".to_string(), - CudaJsonPackage { - name: Some("NVIDIA cuDNN samples".to_string()), - license: "cudnn".to_string(), - license_path: Some("foo/bar".to_string()), - version: "8.9.7.6".to_string(), - cuda_variant: None, - artifacts: CudaArtifactsByPlatform::Source { - source: CudaArtifact { - relative_path: "/biba/boba/fifa".to_string(), - sha256: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string(), - md5: Some("d41d8cd98f00b204e9800998ecf8427e".to_string()), - size: 0, - } - } - } - ),]) - }) - .unwrap() - ); - } - Some(Command::ProcessCudaManifests { include_finished }) => { - let manifests: Vec<(String, String, Option)> = { - let con = ctx.con.lock().unwrap(); - con.execute_batch(include_str!("q/cuda-init.sql")) - .context("q/cuda-init.sql") - .unwrap(); - let mut find_manifests = con - .prepare_cached(include_str!("q/find-cuda-manifests.sql")) - .context("q/find-cuda-manifests.sql") - .unwrap(); - find_manifests - .query(named_params! {":include_finished": include_finished}) - .context("q/find-cuda-manifests.sql") - .unwrap() - .map(|row| <(String, String, Option)>::try_from(row)) - .collect() - .expect("Casting result of q/find-cuda-manifests.sql") - }; - for m in &manifests { - let b64 = m.1.clone(); - let b3 = match B3Digest::from_str(&b64) { - Ok(b3) => b3, - Err(e) => { - eprintln!("Invalid hash recorded for {:?}: {}", m, e); - continue; - } - }; - if let Err(e) = ctx.ensure_blob(&b3).await { - eprintln!("Couldn't provision the blob for {:?}: {}", m, e); - continue; - }; - let json = { - let mut reader = match ctx.blob_service.open_read(&b3).await { - Ok(Some(reader)) => reader, - Ok(None) => { - eprintln!("Blob doesn't exist after ensure_blob: {:?}", m); - continue; - } - Err(e) => { - eprintln!("Couldn't query the blob for {:?}: {}", m, e); - continue; - } - }; - let mut json = String::new(); - match reader.read_to_string(&mut json).await { - Ok(_) => (), - Err(e) => { - eprintln!("Couldn't read blob {:?}: {:?}", m, e); - continue; - } - }; - json - }; - let parsed: CudaJsonManifest = match serde_json::from_str(&json) { - Ok(m) => m, - Err(e) => { - eprintln!("Couldn't parse JSON for {:?}: {:?}", m, e); - continue; - } - }; - { - let mut lock = ctx.con.lock().unwrap(); - let tx = lock.transaction().unwrap(); + let (digest, n_bytes) = match uri { + Ingestable::Path(path) => { + match ingest_path::<_, _, _, &[u8]>(&blob_service, &dir_service, path, None) + .await? { - let mut add_str = tx - .prepare_cached(include_str!("q/add-str.sql")) - .context("q/add-str.sql") - .unwrap(); - let mut add_hash = tx - .prepare_cached(include_str!("q/upsert-blob.sql")) - .context("q/upsert-blob.sql") - .unwrap(); - let mut add_manifest = tx - .prepare_cached(include_str!("q/add-cuda-manifest.sql")) - .context("q/add-cuda-manifest.sql") - .unwrap(); - let mut add_comp = tx - .prepare_cached(include_str!("q/add-cuda-artifact.sql")) - .context("q/add-cuda-artifact.sql") - .unwrap(); - - add_hash.execute(params![b64, None::]).unwrap(); - for s in vec![ - &parsed.release_date, - &parsed.release_label, - &parsed.release_product, - ] { - add_str.execute((s,)).unwrap(); - } - add_manifest - .execute(named_params! { - ":hash": b64, - ":release_date": parsed.release_date, - ":release_label": parsed.release_label, - ":release_product": parsed.release_product, - }) - .context("Executing q/add-cuda-manifest.sql") - .unwrap(); - - for (pname, pkg) in parsed.by_pname { - for (platform, maybe_tag, comp) in pkg.artifacts.into_iter() { - let ps = named_params! { - ":manifest": b64, - ":name": pkg.name, - ":pname": pname, - ":license_name": pkg.license, - ":license_path": pkg.license_path, - ":version": pkg.version, - ":sha256": comp.sha256, - ":md5": comp.md5, - ":platform": platform, - ":relative_path": comp.relative_path, - ":n_bytes": comp.size, - ":compat_tag": maybe_tag - }; - for h in &vec![Some(&comp.sha256), comp.md5.as_ref()] { - add_hash.execute(params![h, None::]).unwrap(); - } - for s in &vec![ - Some(&pname), - pkg.name.as_ref(), - Some(&pkg.license), - pkg.license_path.as_ref(), - Some(&pkg.version), - Some(&platform.to_string()), - Some(&comp.relative_path), - maybe_tag.as_ref(), - ] { - add_str.execute(params![s]).unwrap(); - } - add_comp - .execute(ps) - .context("Executing q/add-cuda-artifact.sql") - .unwrap(); - } + snix_castore::Node::Directory { digest, size } => (digest, size), + snix_castore::Node::File { + digest, + size, + executable: _, + } => (digest, size), + snix_castore::Node::Symlink { target: _ } => { + return Err(anyhow!("TODO: Figure out what to do with symlink roots")) } } - tx.commit() - .expect("Couldn't commit transaction adding manifest or its component"); } + Ingestable::Url(url) => { + let res = client + .get(url.clone()) + .send() + .await + .context(format!("Request.send failed early for {:?}", uri))? + .error_for_status()?; + let mut r = tokio_util::io::StreamReader::new( + res.bytes_stream().map_err(std::io::Error::other), + ); + let mut w = blob_service.open_write().await; + let n_bytes = match tokio::io::copy(&mut r, &mut w).await { + Ok(n) => n, + Err(e) => { + return Err(anyhow!( + "tokio::io::copy failed for uri={} with {}", + uri_s, + e + )); + } + }; + let digest = w.close().await?; + (digest, n_bytes) + } + }; + let digest64 = format!("{}", digest); + add_blob.execute(params![digest64, n_bytes,])?; + add_uri.execute(params![uri_s])?; + let (sample_id, epoch) = add_sample + .query_row(params![uri_s, digest64], |row| <(u32, u32)>::try_from(row))?; + Ok(Some(Ingested { + sample_id, + uri: uri_s, + blake3: digest64, + epoch, + when: IngestedWhen::Now, + })) + } + })) + .buffer_unordered(args.max_parallel) + .collect::, _>>>() + .await; + + for s in samples { + match s { + Err(e) => { + println!("Failed to fetch: {}", e); + } + Ok(None) => {} + Ok(Some(ingested)) => { + println!("{:?}", ingested) } } - None => {} } } diff --git a/src/q/add-cuda-artifact.sql b/src/q/add-cuda-artifact.sql deleted file mode 100644 index 2b860f6..0000000 --- a/src/q/add-cuda-artifact.sql +++ /dev/null @@ -1,17 +0,0 @@ -INSERT INTO - CudaArtifact(manifest, sha256, md5, name, pname, license_name, license_path, version, platform, compat_tag, relative_path, n_bytes) -VALUES ( - (SELECT id FROM Hash WHERE hash=:manifest LIMIT 1), - (SELECT id FROM Hash WHERE hash=:sha256 LIMIT 1), - (SELECT id FROM Hash WHERE hash=:md5 LIMIT 1), - (SELECT id FROM Str WHERE str=:name LIMIT 1), - (SELECT id FROM Str WHERE str=:pname LIMIT 1), - (SELECT id FROM Str WHERE str=:license_name LIMIT 1), - (SELECT id FROM Str WHERE str=:license_path LIMIT 1), - (SELECT id FROM Str WHERE str=:version LIMIT 1), - (SELECT id FROM Str WHERE str=:platform LIMIT 1), - (SELECT id FROM Str WHERE str=:compat_tag LIMIT 1), - (SELECT id FROM Str WHERE str=:relative_path LIMIT 1), - :n_bytes -) -ON CONFLICT DO NOTHING diff --git a/src/q/add-cuda-manifest.sql b/src/q/add-cuda-manifest.sql deleted file mode 100644 index 02e8b1c..0000000 --- a/src/q/add-cuda-manifest.sql +++ /dev/null @@ -1,9 +0,0 @@ -INSERT INTO - CudaManifest(id, release_date, release_label, release_product) -VALUES ( - (SELECT id FROM Hash WHERE hash=:hash LIMIT 1), - (SELECT id FROM Str WHERE str=:release_date LIMIT 1), - (SELECT id FROM Str WHERE str=:release_label LIMIT 1), - (SELECT id FROM Str WHERE str=:release_product LIMIT 1) -) -ON CONFLICT DO NOTHING diff --git a/src/q/add-sample.sql b/src/q/add-sample.sql index 71131ce..f26b3ad 100644 --- a/src/q/add-sample.sql +++ b/src/q/add-sample.sql @@ -1,8 +1,21 @@ -INSERT INTO SidxUriSample(uri, hash, http_code, content_type) -VALUES ( - ( SELECT id FROM Str WHERE str = :uri LIMIT 1), - ( SELECT id FROM Hash WHERE hash = :hash LIMIT 1 ), - :http_code, - ( SELECT id FROM Str WHERE str = :content_type LIMIT 1) +INSERT INTO sidx_uri_sample(uri_id, blake3_id) +VALUES( + ( + SELECT + id + FROM + sidx_uri + WHERE + uri = ? + LIMIT 1 + ), + ( + SELECT + id + FROM + sidx_blake3 + WHERE + blake3 = ? + ) ) RETURNING id, epoch; diff --git a/src/q/add-str.sql b/src/q/add-str.sql deleted file mode 100644 index feae104..0000000 --- a/src/q/add-str.sql +++ /dev/null @@ -1,5 +0,0 @@ -INSERT INTO Str(str) -VALUES -(?) -ON CONFLICT DO NOTHING; - diff --git a/src/q/add-uri-ref.sql b/src/q/add-uri-ref.sql deleted file mode 100644 index 798310a..0000000 --- a/src/q/add-uri-ref.sql +++ /dev/null @@ -1,7 +0,0 @@ -INSERT INTO UriReference(content, target, why) -VALUES ( - (SELECT id FROM Hash WHERE hash=:source LIMIT 1), - (SELECT id FROM Str WHERE str=:target LIMIT 1), - (SELECT id FROM Str WHERE str=:why LIMIT 1) -) -ON CONFLICT DO UPDATE SET why=excluded.why; diff --git a/src/q/cuda-init.sql b/src/q/cuda-init.sql deleted file mode 100644 index 4c7d29d..0000000 --- a/src/q/cuda-init.sql +++ /dev/null @@ -1,49 +0,0 @@ -CREATE TABLE IF NOT EXISTS CudaManifest( - id INTEGER, /* Blake3/ca-node of the JSON */ - release_date INTEGER, /* E.g. "2025-03-06" */ - release_label INTEGER, /* E.g. "12.8.1" */ - release_product INTEGER, /* E.g. "cuda" */ - PRIMARY KEY(id), - FOREIGN KEY(id) REFERENCES Hash(id), - FOREIGN KEY(release_date) REFERENCES Str(id), - FOREIGN KEY(release_label) REFERENCES Str(id), - FOREIGN KEY(release_product) REFERENCES Str(id) -) STRICT; - -CREATE TABLE IF NOT EXISTS CudaArtifact( - manifest INTEGER NOT NULL, - name INTEGER, /* E.g. "cuda_nvcc" */ - pname INTEGER, /* E.g. "CUDA NVCC" */ - license_name INTEGER, /* E.g. "CUDA Toolkit" */ - license_path INTEGER, /* E.g. "cuda_cccl/LICENSE.txt" */ - version INTEGER NOT NULL, /* E.g. "12.8.90" */ - /* Consider making external */ - compat_tag INTEGER, /* E.g. "cuda12" in cudnn */ - sha256 INTEGER, - md5 INTEGER, - platform INTEGER, /* E.g. "linux-x86_64" */ - /* E.g. "cuda_cccl/linux-x86_64/cuda_cccl-linux-x86_64-12.8.90-archive.tar.xz" */ - relative_path INTEGER, - n_bytes INTEGER, /* May be a string in the JSON */ - /* Tempting to have - * PRIMARY KEY(manifest, name, platform), - * however that's not unique at least because of `compat_tag`, - * which might also be `NULL`. - */ - PRIMARY KEY(sha256, manifest), - FOREIGN KEY(manifest) REFERENCES CudaManifest(id), - FOREIGN KEY(manifest) REFERENCES Hash(id), - FOREIGN KEY(name) REFERENCES Str(id), - FOREIGN KEY(pname) REFERENCES Str(id), - FOREIGN KEY(license_name) REFERENCES Str(id), - FOREIGN KEY(license_path) REFERENCES Str(id), - FOREIGN KEY(version) REFERENCES Str(id), - FOREIGN KEY(compat_tag) REFERENCES Str(id), - FOREIGN KEY(sha256) REFERENCES Hash(id), - FOREIGN KEY(md5) REFERENCES Hash(id), - FOREIGN KEY(platform) REFERENCES Str(id), - FOREIGN KEY(relative_path) REFERENCES Str(id) -) STRICT; - -CREATE UNIQUE INDEX IF NOT EXISTS CudaArtifactIdx -ON CudaArtifact(pname, platform, version, compat_tag, name, manifest); diff --git a/src/q/find-cuda-manifests.sql b/src/q/find-cuda-manifests.sql deleted file mode 100644 index 6074454..0000000 --- a/src/q/find-cuda-manifests.sql +++ /dev/null @@ -1,15 +0,0 @@ -SELECT - uri.str AS uri, h.hash, cm.id AS manifest -FROM - SidxUriSample AS s - INNER JOIN Str AS uri - INNER JOIN (Hash AS h LEFT JOIN CudaManifest AS cm ON h.id=cm.id) -ON - s.uri=uri.id - AND s.hash=h.id -WHERE - uri.str LIKE 'https://developer.download.nvidia.com/compute/%.json' - AND (:include_finished OR cm.id IS NULL) -GROUP BY - s.hash -ORDER BY uri.str, s.id DESC; diff --git a/src/q/init.sql b/src/q/init.sql new file mode 100644 index 0000000..94bd769 --- /dev/null +++ b/src/q/init.sql @@ -0,0 +1,22 @@ +CREATE TABLE IF NOT EXISTS sidx_uri( + id INTEGER, + uri TEXT UNIQUE, + PRIMARY KEY(id) + ); +CREATE TABLE IF NOT EXISTS sidx_blake3( + id INTEGER, + blake3 TEXT UNIQUE, /* snix-castore node */ + n_bytes INTEGER NOT NULL, + PRIMARY KEY(id) + ); +CREATE TABLE IF NOT EXISTS sidx_uri_sample( + id INTEGER, + uri_id INTEGER NOT NULL, + blake3_id INTEGER, + epoch INTEGER NOT NULL DEFAULT (unixepoch()), + PRIMARY KEY(id), + FOREIGN KEY(uri_id) REFERENCES sidx_uri(id), + FOREIGN KEY(blake3_id) REFERENCES sidx_blake3(id) +); +CREATE INDEX IF NOT EXISTS sidx_uri_blake3_idx +ON sidx_uri_sample(uri_id, blake3_id, epoch); diff --git a/src/q/latest-download.sql b/src/q/latest-download.sql index 0161664..a0e6938 100644 --- a/src/q/latest-download.sql +++ b/src/q/latest-download.sql @@ -1,19 +1,16 @@ SELECT s.id AS sample_id, - h.hash, - h.n_bytes, - s.http_code, + b.blake3, s.epoch FROM - SidxUriSample AS s, - Str AS u, - Hash AS h + sidx_uri_sample AS s, + sidx_uri AS u, + sidx_blake3 AS b ON - s.uri = u.id - AND s.hash = h.id + s.uri_id = u.id + AND s.blake3_id = b.id WHERE - u.str = ? - AND s.hash IS NOT NULL + u.uri = ? ORDER BY s.epoch DESC LIMIT 1; diff --git a/src/q/sidx-init.sql b/src/q/sidx-init.sql deleted file mode 100644 index 5cf2492..0000000 --- a/src/q/sidx-init.sql +++ /dev/null @@ -1,37 +0,0 @@ -CREATE TABLE IF NOT EXISTS Hash( - id INTEGER, - hash TEXT UNIQUE, /* snix-castore node */ - n_bytes INTEGER, - PRIMARY KEY(id) -) STRICT; /* Essentially random strings */ -CREATE TABLE IF NOT EXISTS Str( - id INTEGER, - str TEXT UNIQUE, - PRIMARY KEY(id) -) STRICT; /* "Naturally occuring" strings */ -CREATE TABLE IF NOT EXISTS SidxUriSample( - id INTEGER, - uri INTEGER NOT NULL, - hash INTEGER, - epoch INTEGER NOT NULL DEFAULT (unixepoch()), - http_code INTEGER DEFAULT NULL, - content_type INTEGER DEFAULT NULL, - PRIMARY KEY(id), - FOREIGN KEY(uri) REFERENCES Str(id), - FOREIGN KEY(hash) REFERENCES Hash(id), - FOREIGN KEY(content_type) REFERENCES Str(id) -) STRICT; -CREATE INDEX IF NOT EXISTS SidxUriHashIdx -ON SidxUriSample(uri, hash, epoch); - -CREATE TABLE IF NOT EXISTS "UriReference" ( - "id" INTEGER, - "content" INTEGER NOT NULL, - "target" INTEGER NOT NULL, - "why" INTEGER, - PRIMARY KEY("id"), - CONSTRAINT "NoDupRefs" UNIQUE("content","target","why"), - FOREIGN KEY("content") REFERENCES "Hash"("id"), - FOREIGN KEY("target") REFERENCES "Str"("id"), - FOREIGN KEY("why") REFERENCES "Str"("id") -) STRICT; diff --git a/src/q/upsert-blob.sql b/src/q/upsert-blob.sql index 3589e33..66fb7ec 100644 --- a/src/q/upsert-blob.sql +++ b/src/q/upsert-blob.sql @@ -1,4 +1,4 @@ -INSERT INTO Hash(hash, n_bytes) +INSERT INTO sidx_blake3(blake3, n_bytes) VALUES (?, ?) ON CONFLICT DO NOTHING; diff --git a/src/q/upsert-uri.sql b/src/q/upsert-uri.sql index 8702c5a..555ede8 100644 --- a/src/q/upsert-uri.sql +++ b/src/q/upsert-uri.sql @@ -1,4 +1,4 @@ -INSERT INTO Str(str) +INSERT INTO sidx_uri(uri) VALUES (?) ON CONFLICT DO NOTHING; diff --git a/src/q/uris-of-hash.sql b/src/q/uris-of-hash.sql deleted file mode 100644 index 827fdeb..0000000 --- a/src/q/uris-of-hash.sql +++ /dev/null @@ -1,16 +0,0 @@ -SELECT - uri.str AS uri -FROM - SidxUriSample AS s - INNER JOIN Str AS uri - INNER JOIN Hash AS h -ON - s.uri=uri.id - AND s.hash=h.id -WHERE - h.hash=:hash -ORDER BY - s.epoch DESC -LIMIT - :limit -; diff --git a/static/some.css b/static/some.css deleted file mode 100644 index 5d4ca51..0000000 --- a/static/some.css +++ /dev/null @@ -1,48 +0,0 @@ -.index { - font-family: "Source Serif Pro", "Linux Libertine", monospace; -} - -.db-table > h3 { - font-variant: small-caps; -} - -h1, nav { - font-variant: small-caps; - font-family: "Inconsolata", monospace; -} - -h2 { - font-variant: small-caps; -} - -th { - font-variant: small-caps; -} - -header { - background-color: black; -} - -.ft { - background-color: black; -} - -form input[type="submit"] { - background-color: black; -} - -a:link { - color: #404040; -} - -a:visited { - color: darkgrey; -} - -a:hover { - color: black; -} - -.rows-and-columns > tbody > tr:nth-child(even) { - background-color: #F5F5F5; -}