Ingestable: use ingest_path for local paths

This way we don't error-out when a path is a directory. That said, we're
still only including the root in sidx.db (e.g. manifests/ but not
manifests/*.json). We should change that next.

Also renamed "blob_id" to "blake3_id" because datasette has a special
branch for ${column}_id referencing a table that contains ${column}
This commit is contained in:
Else, Someone 2025-04-20 00:12:51 +00:00
parent 65326b2dcb
commit 56a0b346cd
5 changed files with 53 additions and 48 deletions

View file

@ -1,16 +1,14 @@
use std::{
path::{absolute, PathBuf},
pin::Pin,
};
use std::path::{absolute, PathBuf};
use anyhow::anyhow;
use anyhow::Context;
use clap::Parser;
use futures::{stream, StreamExt, TryStreamExt};
use rusqlite::{params, OptionalExtension};
use snix_castore::{blobservice, directoryservice};
use snix_castore::{blobservice, directoryservice, import::fs::ingest_path};
use url::Url;
#[derive(Clone)]
#[derive(Clone, Debug)]
enum Ingestable {
Url(Url),
Path(PathBuf),
@ -143,7 +141,7 @@ async fn main() {
let samples = stream::iter(args.inputs.iter().map(|uri| {
let client = &client;
let blob_service = &blob_service;
let _dir_service = &dir_service;
let dir_service = &dir_service;
let con = &con;
let mut find_sample = con
.prepare(include_str!("q/latest-download.sql"))
@ -174,40 +172,47 @@ async fn main() {
}));
}
}
let mut r: Pin<Box<dyn tokio::io::AsyncRead>> = {
match uri {
Ingestable::Path(path) => match tokio::fs::File::open(path).await {
Ok(f) => Box::pin(f),
Err(e) => {
return Err(anyhow!("Failed to read {:?}: {}", path, e));
let (digest, n_bytes) = match uri {
Ingestable::Path(path) => {
match ingest_path::<_, _, _, &[u8]>(&blob_service, &dir_service, path, None)
.await?
{
snix_castore::Node::Directory { digest, size } => (digest, size),
snix_castore::Node::File {
digest,
size,
executable: _,
} => (digest, size),
snix_castore::Node::Symlink { target: _ } => {
return Err(anyhow!("TODO: Figure out what to do with symlink roots"))
}
},
Ingestable::Url(url) => {
let res = match client.get(url.clone()).send().await {
Ok(res) => res.error_for_status()?,
Err(e) => {
return Err(anyhow!("Failed to GET {}: {}", url, e));
}
};
let r = tokio_util::io::StreamReader::new(
res.bytes_stream().map_err(std::io::Error::other),
);
Box::pin(r)
}
}
};
let mut w = blob_service.open_write().await;
let n_bytes = match tokio::io::copy(&mut r, &mut w).await {
Ok(n) => n,
Err(e) => {
return Err(anyhow!(
"tokio::io::copy failed for uri={} with {}",
uri_s,
e
));
Ingestable::Url(url) => {
let res = client
.get(url.clone())
.send()
.await
.context(format!("Request.send failed early for {:?}", uri))?
.error_for_status()?;
let mut r = tokio_util::io::StreamReader::new(
res.bytes_stream().map_err(std::io::Error::other),
);
let mut w = blob_service.open_write().await;
let n_bytes = match tokio::io::copy(&mut r, &mut w).await {
Ok(n) => n,
Err(e) => {
return Err(anyhow!(
"tokio::io::copy failed for uri={} with {}",
uri_s,
e
));
}
};
let digest = w.close().await?;
(digest, n_bytes)
}
};
let digest = w.close().await?;
let digest64 = format!("{}", digest);
add_blob.execute(params![digest64, n_bytes,])?;
add_uri.execute(params![uri_s])?;
@ -229,7 +234,7 @@ async fn main() {
for s in samples {
match s {
Err(e) => {
println!("Failed to fetch ...: {}", e);
println!("Failed to fetch: {}", e);
}
Ok(None) => {}
Ok(Some(ingested)) => {