Ingestable: use ingest_path for local paths
This way we don't error-out when a path is a directory. That said, we're still only including the root in sidx.db (e.g. manifests/ but not manifests/*.json). We should change that next. Also renamed "blob_id" to "blake3_id" because datasette has a special branch for ${column}_id referencing a table that contains ${column}
This commit is contained in:
parent
65326b2dcb
commit
56a0b346cd
5 changed files with 53 additions and 48 deletions
79
src/main.rs
79
src/main.rs
|
@ -1,16 +1,14 @@
|
|||
use std::{
|
||||
path::{absolute, PathBuf},
|
||||
pin::Pin,
|
||||
};
|
||||
use std::path::{absolute, PathBuf};
|
||||
|
||||
use anyhow::anyhow;
|
||||
use anyhow::Context;
|
||||
use clap::Parser;
|
||||
use futures::{stream, StreamExt, TryStreamExt};
|
||||
use rusqlite::{params, OptionalExtension};
|
||||
use snix_castore::{blobservice, directoryservice};
|
||||
use snix_castore::{blobservice, directoryservice, import::fs::ingest_path};
|
||||
use url::Url;
|
||||
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Debug)]
|
||||
enum Ingestable {
|
||||
Url(Url),
|
||||
Path(PathBuf),
|
||||
|
@ -143,7 +141,7 @@ async fn main() {
|
|||
let samples = stream::iter(args.inputs.iter().map(|uri| {
|
||||
let client = &client;
|
||||
let blob_service = &blob_service;
|
||||
let _dir_service = &dir_service;
|
||||
let dir_service = &dir_service;
|
||||
let con = &con;
|
||||
let mut find_sample = con
|
||||
.prepare(include_str!("q/latest-download.sql"))
|
||||
|
@ -174,40 +172,47 @@ async fn main() {
|
|||
}));
|
||||
}
|
||||
}
|
||||
let mut r: Pin<Box<dyn tokio::io::AsyncRead>> = {
|
||||
match uri {
|
||||
Ingestable::Path(path) => match tokio::fs::File::open(path).await {
|
||||
Ok(f) => Box::pin(f),
|
||||
Err(e) => {
|
||||
return Err(anyhow!("Failed to read {:?}: {}", path, e));
|
||||
let (digest, n_bytes) = match uri {
|
||||
Ingestable::Path(path) => {
|
||||
match ingest_path::<_, _, _, &[u8]>(&blob_service, &dir_service, path, None)
|
||||
.await?
|
||||
{
|
||||
snix_castore::Node::Directory { digest, size } => (digest, size),
|
||||
snix_castore::Node::File {
|
||||
digest,
|
||||
size,
|
||||
executable: _,
|
||||
} => (digest, size),
|
||||
snix_castore::Node::Symlink { target: _ } => {
|
||||
return Err(anyhow!("TODO: Figure out what to do with symlink roots"))
|
||||
}
|
||||
},
|
||||
Ingestable::Url(url) => {
|
||||
let res = match client.get(url.clone()).send().await {
|
||||
Ok(res) => res.error_for_status()?,
|
||||
Err(e) => {
|
||||
return Err(anyhow!("Failed to GET {}: {}", url, e));
|
||||
}
|
||||
};
|
||||
let r = tokio_util::io::StreamReader::new(
|
||||
res.bytes_stream().map_err(std::io::Error::other),
|
||||
);
|
||||
Box::pin(r)
|
||||
}
|
||||
}
|
||||
};
|
||||
let mut w = blob_service.open_write().await;
|
||||
let n_bytes = match tokio::io::copy(&mut r, &mut w).await {
|
||||
Ok(n) => n,
|
||||
Err(e) => {
|
||||
return Err(anyhow!(
|
||||
"tokio::io::copy failed for uri={} with {}",
|
||||
uri_s,
|
||||
e
|
||||
));
|
||||
Ingestable::Url(url) => {
|
||||
let res = client
|
||||
.get(url.clone())
|
||||
.send()
|
||||
.await
|
||||
.context(format!("Request.send failed early for {:?}", uri))?
|
||||
.error_for_status()?;
|
||||
let mut r = tokio_util::io::StreamReader::new(
|
||||
res.bytes_stream().map_err(std::io::Error::other),
|
||||
);
|
||||
let mut w = blob_service.open_write().await;
|
||||
let n_bytes = match tokio::io::copy(&mut r, &mut w).await {
|
||||
Ok(n) => n,
|
||||
Err(e) => {
|
||||
return Err(anyhow!(
|
||||
"tokio::io::copy failed for uri={} with {}",
|
||||
uri_s,
|
||||
e
|
||||
));
|
||||
}
|
||||
};
|
||||
let digest = w.close().await?;
|
||||
(digest, n_bytes)
|
||||
}
|
||||
};
|
||||
let digest = w.close().await?;
|
||||
let digest64 = format!("{}", digest);
|
||||
add_blob.execute(params![digest64, n_bytes,])?;
|
||||
add_uri.execute(params![uri_s])?;
|
||||
|
@ -229,7 +234,7 @@ async fn main() {
|
|||
for s in samples {
|
||||
match s {
|
||||
Err(e) => {
|
||||
println!("Failed to fetch ...: {}", e);
|
||||
println!("Failed to fetch: {}", e);
|
||||
}
|
||||
Ok(None) => {}
|
||||
Ok(Some(ingested)) => {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue