diff --git a/Cargo.lock b/Cargo.lock
index dd4233c..107eccf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2695,6 +2695,7 @@ dependencies = [
"reqwest",
"rusqlite",
"scraper",
+ "serde",
"serde_json",
"snix-castore",
"tokio",
diff --git a/Cargo.toml b/Cargo.toml
index f8e09cf..a75186e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ futures = "0.3.31"
reqwest = "0.12.15"
rusqlite = "0.34.0"
scraper = "0.23.1"
+serde = "1.0.219"
serde_json = "1.0.140"
snix-castore = { version = "0.1.0", git = "https://git.snix.dev/snix/snix.git" }
tokio = "1.44.2"
diff --git a/README.md b/README.md
index 073d99a..1e072bb 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
-sidx
+[sidx](https://forge.someonex.net/else/sidx)
===
+Work in Progress.
Indexing archives and build outputs.
@@ -17,4 +18,8 @@ Roadmap
Approach
---
-Vapourware and means to an end.
+Vapourware and means to an end:
+[this project](https://forge.someonex.net/else/sidx) was originally motivated by the needs of maintaining
+`cudaPackages` in Nixpkgs.
+Specifically, it attempts to answer the question of "what is there to be maintained",
+improve [observability and debug-ability of the package set (cf. demo)](https://cuda-index.someonex.net/sidx/UriReference).
diff --git a/default.nix b/default.nix
index a822a80..01ecf46 100644
--- a/default.nix
+++ b/default.nix
@@ -50,25 +50,110 @@ lib.makeScope pkgs.newScope (
datasette-wrapped = self.callPackage (
{
datasette,
- datasette-metadata,
+ datasette-assets,
makeWrapper,
runCommand,
}:
- runCommand "datasettew" { nativeBuildInputs = [ makeWrapper ]; } ''
- mkdir -p "$out/bin"
- makeWrapper ${lib.getExe datasette} "$out/bin/datasettew" \
- --add-flags --metadata=${datasette-metadata}
- ''
+ runCommand "datasettew"
+ {
+ nativeBuildInputs = [ makeWrapper ];
+ preferLocalBuild = true;
+ allowSubstitutes = false;
+ }
+ ''
+ mkdir -p "$out/bin"
+ makeWrapper ${lib.getExe datasette} "$out/bin/datasettew" \
+ --append-flags --metadata=${datasette-assets}/metadata.json \
+ --append-flags --static=static:${datasette-assets}/static
+ ''
+ ) { };
+ datasette-assets = self.callPackage (
+ {
+ runCommand,
+ datasette-metadata,
+ datasette-settings,
+ }:
+ runCommand "datasette-assets"
+ {
+ preferLocalBuild = true;
+ allowSubstitutes = false;
+ }
+ ''
+ mkdir "$out"
+ cp --no-preserve=mode -r ${./static} "$out"/static
+ cp ${datasette-metadata} "$out"/metadata.json
+ cp ${datasette-settings} "$out"/settings.json
+ ''
+ ) { };
+ datasette-settings = self.callPackage (
+ { formats }:
+ (formats.json { }).generate "datasette-settings.json" {
+ sql_time_limit_ms = 8000;
+ }
) { };
datasette-metadata = self.callPackage (
{ formats }:
(formats.json { }).generate "datasette-metadata.json" {
+ title = "CUDA INDEX";
+ description_html = ''
+
Visualizing the contents of Nixpkgs' cudaPackages.
+ Generated via an ad-hoc indexing tool.
+
+ '';
+ "extra_css_urls" = [
+ "/static/some.css"
+ ];
"databases" = {
"sidx" = {
"tables" = {
"Hash" = {
"label_column" = "hash";
};
+ "CudaArtifact" = {
+ facets = [
+ "pname"
+ "platform"
+ ];
+ };
+ };
+ queries.cuda_conflicts = {
+ title = "CudaArtifact Conflicts";
+ description_html = ''
+ CudaArtifact
s (identified by sha256
)
+ claiming the same (pname, version, platform)
triple
+ '';
+ sql = ''
+ SELECT
+ COUNT(DISTINCT sha256) AS conflicts,
+ pname.str AS pname,
+ ver.str AS ver,
+ plat.str AS plat,
+ GROUP_CONCAT(name.str, char(10)) AS name,
+ GROUP_CONCAT(tag.str, char(10)) AS tag,
+ GROUP_CONCAT(h.hash, char(10)) AS sha256
+ FROM
+ (
+ CudaArtifact AS cc,
+ Str AS name,
+ Str AS pname,
+ Str as ver,
+ Str as plat,
+ Hash as h
+ ON cc.name=name.id
+ AND cc.pname=pname.id
+ AND cc.version = ver.id
+ AND cc.platform = plat.id
+ AND cc.sha256 = h.id
+ )
+ LEFT JOIN Str AS tag
+ ON
+ cc.compat_tag=tag.id
+ GROUP BY
+ cc.pname, cc.version, cc.platform
+ HAVING
+ conflicts >= CAST(:min_conflicts AS INTEGER)
+ ORDER BY conflicts DESC
+ '';
};
};
};
diff --git a/shell.nix b/shell.nix
index c168f3f..50c74c7 100644
--- a/shell.nix
+++ b/shell.nix
@@ -9,11 +9,13 @@
openssl ? pkgs.openssl,
rust-analyzer ? pkgs.rust-analyzer,
rustc ? pkgs.rustc,
+ rustfmt ? pkgs.rustfmt,
cargo ? pkgs.cargo,
pkg-config ? pkgs.pkg-config,
crate2nix ? pkgs.crate2nix,
protobuf ? pkgs.protobuf,
datasette-wrapped ? self.datasette-wrapped,
+ datasette-assets ? self.datasette-assets,
...
}:
mkShell {
@@ -23,6 +25,7 @@ mkShell {
cargo
crate2nix
rustc
+ rustfmt
rust-analyzer
pkg-config
protobuf
@@ -32,6 +35,7 @@ mkShell {
openssl
sqlite
];
+ DATASETTE_ASSETS = datasette-assets; # uploaded to cuda-index.someonex.net in bulk...
shellHook = ''
export DATABASE_PATH="$HOME/.local/share/sidx/sidx.db"
unset out outputs phases
diff --git a/src/main.rs b/src/main.rs
index 43f7ee2..ec3a3e5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,23 +1,28 @@
-use std::collections::HashSet;
-use std::path::{absolute, PathBuf};
+use std::collections::{HashMap, HashSet};
+use std::marker::PhantomData;
+use std::path::{PathBuf, absolute};
use std::str::FromStr;
use std::sync::Arc;
+use std::{fmt, io};
use anyhow::Context;
-use anyhow::{anyhow, Error};
+use anyhow::{Error, anyhow};
use clap::Parser;
use clap::Subcommand;
-use futures::{stream, StreamExt, TryStreamExt};
-use rusqlite::{params, OptionalExtension};
+use futures::{StreamExt, TryStreamExt, stream};
+use rusqlite::fallible_iterator::FallibleIterator as _;
+use rusqlite::{OptionalExtension, named_params, params};
use scraper::{Html, Selector};
+use serde::de::{self, Visitor};
+use serde::{Deserialize, Serialize};
+use snix_castore::B3Digest;
use snix_castore::blobservice::BlobService;
use snix_castore::directoryservice::DirectoryService;
-use snix_castore::B3Digest;
use snix_castore::{blobservice, directoryservice, import::fs::ingest_path};
use std::sync::Mutex;
use tokio::io::{AsyncReadExt, BufReader};
-use tokio::sync::mpsc::{channel, Sender};
use tokio::sync::Semaphore;
+use tokio::sync::mpsc::{Sender, channel};
use tokio_stream::wrappers::ReceiverStream;
use url::Url;
@@ -130,6 +135,12 @@ enum Command {
#[clap(value_parser, num_args = 1)]
url: Vec,
},
+ DemoCudaManifest,
+ FormatCudaManifest,
+ ProcessCudaManifests {
+ #[clap(short, long, action)]
+ include_finished: bool,
+ },
}
#[derive(Parser)]
@@ -175,6 +186,10 @@ async fn open_context(
}
let con = rusqlite::Connection::open(&db_path).expect("Failed to construct Database object");
+ con.pragma_update(None, "jorunal_mode", "wal").unwrap();
+ con.pragma_update(None, "synchronous", "normal").unwrap();
+ con.pragma_update(None, "temp_store", "memory").unwrap();
+ con.pragma_update(None, "foreign_keys", "on").unwrap();
con.execute_batch(include_str!("q/sidx-init.sql"))
.expect("Failed to execute sidx-init.sql");
let castore_path = absolute(castore_path).expect("Failed to canonicalize castore_path");
@@ -190,12 +205,12 @@ async fn open_context(
let dir_service = directoryservice::from_addr(&std::format!(
"objectstore+file://{}",
castore_path
- .join("directory")
+ .join("directories")
.to_str()
.expect("Path::to_str unexpectedly broken")
))
.await
- .expect("Couldn't initialize .castore/directory");
+ .expect("Couldn't initialize .castore/directories");
SidxContext::, Arc> {
refetch,
@@ -208,8 +223,23 @@ async fn open_context(
}
}
+impl Drop for SidxContext
+where
+ BS: BlobService + Clone,
+ DS: DirectoryService + Clone,
+{
+ fn drop(&mut self) {
+ let con = self
+ .con
+ .lock()
+ .expect("Acquiring mutex for sqlite to run #pragma optimize before exit");
+ con.pragma_update(None, "analysis_limit", 500).unwrap();
+ con.pragma_query(None, "optimize", |_| Ok(())).unwrap();
+ }
+}
+
impl SidxContext {
- async fn db_latest_download(&self, uri: &str) -> Result