diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57ad90d --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.direnv +result +result-* +target diff --git a/npins/default.nix b/npins/default.nix new file mode 100644 index 0000000..5e7d086 --- /dev/null +++ b/npins/default.nix @@ -0,0 +1,80 @@ +# Generated by npins. Do not modify; will be overwritten regularly +let + data = builtins.fromJSON (builtins.readFile ./sources.json); + version = data.version; + + mkSource = + spec: + assert spec ? type; + let + path = + if spec.type == "Git" then + mkGitSource spec + else if spec.type == "GitRelease" then + mkGitSource spec + else if spec.type == "PyPi" then + mkPyPiSource spec + else if spec.type == "Channel" then + mkChannelSource spec + else + builtins.throw "Unknown source type ${spec.type}"; + in + spec // { outPath = path; }; + + mkGitSource = + { + repository, + revision, + url ? null, + hash, + branch ? null, + ... + }: + assert repository ? type; + # At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository + # In the latter case, there we will always be an url to the tarball + if url != null then + (builtins.fetchTarball { + inherit url; + sha256 = hash; # FIXME: check nix version & use SRI hashes + }) + else + assert repository.type == "Git"; + let + urlToName = + url: rev: + let + matched = builtins.match "^.*/([^/]*)(\\.git)?$" repository.url; + + short = builtins.substring 0 7 rev; + + appendShort = if (builtins.match "[a-f0-9]*" rev) != null then "-${short}" else ""; + in + "${if matched == null then "source" else builtins.head matched}${appendShort}"; + name = urlToName repository.url revision; + in + builtins.fetchGit { + url = repository.url; + rev = revision; + inherit name; + # hash = hash; + }; + + mkPyPiSource = + { url, hash, ... }: + builtins.fetchurl { + inherit url; + sha256 = hash; + }; + + mkChannelSource = + { url, hash, ... }: + builtins.fetchTarball { + inherit url; + sha256 = hash; + }; +in +if version == 3 then + builtins.mapAttrs (_: mkSource) data.pins +else + throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`" diff --git a/npins/sources.json b/npins/sources.json new file mode 100644 index 0000000..3733a7c --- /dev/null +++ b/npins/sources.json @@ -0,0 +1,11 @@ +{ + "pins": { + "nixpkgs": { + "type": "Channel", + "name": "nixpkgs-unstable", + "url": "https://releases.nixos.org/nixpkgs/nixpkgs-25.05pre782598.18dd725c2960/nixexprs.tar.xz", + "hash": "1p7kgyph7xkj57p19nbxpycmbchc6d9gwdznsmxhymrzyzi3if21" + } + }, + "version": 3 +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 9ef76b4..482aae7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,14 @@ -use std::{ - path::{absolute, PathBuf}, - pin::Pin, -}; +use std::path::{absolute, PathBuf}; use anyhow::anyhow; +use anyhow::Context; use clap::Parser; use futures::{stream, StreamExt, TryStreamExt}; use rusqlite::{params, OptionalExtension}; -use snix_castore::{blobservice, directoryservice}; +use snix_castore::{blobservice, directoryservice, import::fs::ingest_path}; use url::Url; -#[derive(Clone)] +#[derive(Clone, Debug)] enum Ingestable { Url(Url), Path(PathBuf), @@ -143,7 +141,7 @@ async fn main() { let samples = stream::iter(args.inputs.iter().map(|uri| { let client = &client; let blob_service = &blob_service; - let _dir_service = &dir_service; + let dir_service = &dir_service; let con = &con; let mut find_sample = con .prepare(include_str!("q/latest-download.sql")) @@ -174,40 +172,47 @@ async fn main() { })); } } - let mut r: Pin> = { - match uri { - Ingestable::Path(path) => match tokio::fs::File::open(path).await { - Ok(f) => Box::pin(f), - Err(e) => { - return Err(anyhow!("Failed to read {:?}: {}", path, e)); + let (digest, n_bytes) = match uri { + Ingestable::Path(path) => { + match ingest_path::<_, _, _, &[u8]>(&blob_service, &dir_service, path, None) + .await? + { + snix_castore::Node::Directory { digest, size } => (digest, size), + snix_castore::Node::File { + digest, + size, + executable: _, + } => (digest, size), + snix_castore::Node::Symlink { target: _ } => { + return Err(anyhow!("TODO: Figure out what to do with symlink roots")) } - }, - Ingestable::Url(url) => { - let res = match client.get(url.clone()).send().await { - Ok(res) => res.error_for_status()?, - Err(e) => { - return Err(anyhow!("Failed to GET {}: {}", url, e)); - } - }; - let r = tokio_util::io::StreamReader::new( - res.bytes_stream().map_err(std::io::Error::other), - ); - Box::pin(r) } } - }; - let mut w = blob_service.open_write().await; - let n_bytes = match tokio::io::copy(&mut r, &mut w).await { - Ok(n) => n, - Err(e) => { - return Err(anyhow!( - "tokio::io::copy failed for uri={} with {}", - uri_s, - e - )); + Ingestable::Url(url) => { + let res = client + .get(url.clone()) + .send() + .await + .context(format!("Request.send failed early for {:?}", uri))? + .error_for_status()?; + let mut r = tokio_util::io::StreamReader::new( + res.bytes_stream().map_err(std::io::Error::other), + ); + let mut w = blob_service.open_write().await; + let n_bytes = match tokio::io::copy(&mut r, &mut w).await { + Ok(n) => n, + Err(e) => { + return Err(anyhow!( + "tokio::io::copy failed for uri={} with {}", + uri_s, + e + )); + } + }; + let digest = w.close().await?; + (digest, n_bytes) } }; - let digest = w.close().await?; let digest64 = format!("{}", digest); add_blob.execute(params![digest64, n_bytes,])?; add_uri.execute(params![uri_s])?; @@ -229,7 +234,7 @@ async fn main() { for s in samples { match s { Err(e) => { - println!("Failed to fetch ...: {}", e); + println!("Failed to fetch: {}", e); } Ok(None) => {} Ok(Some(ingested)) => { diff --git a/src/q/add-sample.sql b/src/q/add-sample.sql index c28f18c..f26b3ad 100644 --- a/src/q/add-sample.sql +++ b/src/q/add-sample.sql @@ -1,4 +1,4 @@ -INSERT INTO sidx_uri_sample(uri_id, blob_id) +INSERT INTO sidx_uri_sample(uri_id, blake3_id) VALUES( ( SELECT @@ -13,7 +13,7 @@ VALUES( SELECT id FROM - sidx_blob + sidx_blake3 WHERE blake3 = ? ) diff --git a/src/q/init.sql b/src/q/init.sql index 1b70325..94bd769 100644 --- a/src/q/init.sql +++ b/src/q/init.sql @@ -3,20 +3,20 @@ CREATE TABLE IF NOT EXISTS sidx_uri( uri TEXT UNIQUE, PRIMARY KEY(id) ); -CREATE TABLE IF NOT EXISTS sidx_blob( +CREATE TABLE IF NOT EXISTS sidx_blake3( id INTEGER, - blake3 TEXT UNIQUE, + blake3 TEXT UNIQUE, /* snix-castore node */ n_bytes INTEGER NOT NULL, PRIMARY KEY(id) ); CREATE TABLE IF NOT EXISTS sidx_uri_sample( id INTEGER, uri_id INTEGER NOT NULL, - blob_id INTEGER, + blake3_id INTEGER, epoch INTEGER NOT NULL DEFAULT (unixepoch()), PRIMARY KEY(id), FOREIGN KEY(uri_id) REFERENCES sidx_uri(id), - FOREIGN KEY(blob_id) REFERENCES sidx_blob(id) + FOREIGN KEY(blake3_id) REFERENCES sidx_blake3(id) ); -CREATE INDEX IF NOT EXISTS sidx_uri_blob_idx -ON sidx_uri_sample(uri_id, blob_id, epoch); +CREATE INDEX IF NOT EXISTS sidx_uri_blake3_idx +ON sidx_uri_sample(uri_id, blake3_id, epoch); diff --git a/src/q/latest-download.sql b/src/q/latest-download.sql index f444f37..a0e6938 100644 --- a/src/q/latest-download.sql +++ b/src/q/latest-download.sql @@ -5,10 +5,10 @@ SELECT FROM sidx_uri_sample AS s, sidx_uri AS u, - sidx_blob AS b + sidx_blake3 AS b ON s.uri_id = u.id - AND s.blob_id = b.id + AND s.blake3_id = b.id WHERE u.uri = ? ORDER BY diff --git a/src/q/upsert-blob.sql b/src/q/upsert-blob.sql index cb037ef..66fb7ec 100644 --- a/src/q/upsert-blob.sql +++ b/src/q/upsert-blob.sql @@ -1,4 +1,4 @@ -INSERT INTO sidx_blob(blake3, n_bytes) +INSERT INTO sidx_blake3(blake3, n_bytes) VALUES (?, ?) ON CONFLICT DO NOTHING;