Compare commits

...

2 commits

5 changed files with 309 additions and 88 deletions

View file

@ -88,7 +88,7 @@ lib.makeScope pkgs.newScope (
datasette-settings = self.callPackage (
{ formats }:
(formats.json { }).generate "datasette-settings.json" {
sql_time_limit_ms = 8000;
sql_time_limit_ms = 16000;
}
) { };
datasette-metadata = self.callPackage (
@ -122,38 +122,18 @@ lib.makeScope pkgs.newScope (
<code>CudaArtifact</code>s (identified by <code>sha256</code>)
claiming the same <code>(pname, version, platform)</code> triple
'';
sql = ''
SELECT
COUNT(DISTINCT sha256) AS conflicts,
pname.str AS pname,
ver.str AS ver,
plat.str AS plat,
GROUP_CONCAT(name.str, char(10)) AS name,
GROUP_CONCAT(tag.str, char(10)) AS tag,
GROUP_CONCAT(h.hash, char(10)) AS sha256
FROM
(
CudaArtifact AS cc,
Str AS name,
Str AS pname,
Str as ver,
Str as plat,
Hash as h
ON cc.name=name.id
AND cc.pname=pname.id
AND cc.version = ver.id
AND cc.platform = plat.id
AND cc.sha256 = h.id
)
LEFT JOIN Str AS tag
ON
cc.compat_tag=tag.id
GROUP BY
cc.pname, cc.version, cc.platform
HAVING
conflicts >= CAST(:min_conflicts AS INTEGER)
ORDER BY conflicts DESC
sql = builtins.readFile ./src/q/summary-cuda-conflicts.sql;
};
queries.cuda_pnames = {
title = "Known CUDA Artifacts";
description_html = ''
Overview of known CUDA artifacts sorted by <code>pname</code>
'';
sql = builtins.readFile ./src/q/summary-cuda-pnames.sql;
};
queries.cuda_platforms = {
title = "CUDA: Supported Platforms ";
sql = builtins.readFile ./src/q/summary-cuda-platforms.sql;
};
};
};

View file

@ -186,7 +186,7 @@ async fn open_context(
}
let con = rusqlite::Connection::open(&db_path).expect("Failed to construct Database object");
con.pragma_update(None, "jorunal_mode", "wal").unwrap();
con.pragma_update(None, "journal_mode", "wal").unwrap();
con.pragma_update(None, "synchronous", "normal").unwrap();
con.pragma_update(None, "temp_store", "memory").unwrap();
con.pragma_update(None, "foreign_keys", "on").unwrap();
@ -238,6 +238,86 @@ where
}
}
trait ConnectionLike {
#[allow(dead_code)]
fn execute<P: rusqlite::Params>(&self, sql: &str, params: P) -> rusqlite::Result<usize>;
fn prepare_cached(&self, sql: &str) -> rusqlite::Result<rusqlite::CachedStatement<'_>>;
}
impl ConnectionLike for rusqlite::Connection {
fn execute<P: rusqlite::Params>(&self, sql: &str, params: P) -> rusqlite::Result<usize> {
<rusqlite::Connection>::execute(self, sql, params)
}
fn prepare_cached(&self, sql: &str) -> rusqlite::Result<rusqlite::CachedStatement<'_>> {
<rusqlite::Connection>::prepare_cached(self, sql)
}
}
impl<'a> ConnectionLike for rusqlite::Transaction<'a> {
fn execute<P: rusqlite::Params>(&self, sql: &str, params: P) -> rusqlite::Result<usize> {
<rusqlite::Connection>::execute(self, sql, params)
}
fn prepare_cached(&self, sql: &str) -> rusqlite::Result<rusqlite::CachedStatement<'_>> {
<rusqlite::Connection>::prepare_cached(self, sql)
}
}
impl<'a> ConnectionLike for rusqlite::Savepoint<'a> {
fn execute<P: rusqlite::Params>(&self, sql: &str, params: P) -> rusqlite::Result<usize> {
<rusqlite::Connection>::execute(self, sql, params)
}
fn prepare_cached(&self, sql: &str) -> rusqlite::Result<rusqlite::CachedStatement<'_>> {
<rusqlite::Connection>::prepare_cached(self, sql)
}
}
fn db_add_sample(
tx: &mut rusqlite::Transaction<'_>,
uri: &str,
hash: &Option<String>,
http_code: &Option<u16>,
content_type: &Option<String>,
) -> Result<(u32, u32), Error> {
let mut sp = tx.savepoint().context("db_add_sample")?;
sp.set_drop_behavior(rusqlite::DropBehavior::Commit);
{
if let Some(h) = hash {
db_add_blob(&mut sp, &h.clone(), None)?;
};
let mut add_sample = sp
.prepare_cached(include_str!("q/add-sample.sql"))
.context("Failed to prepare add-sample.sql")?;
return Ok(add_sample.query_row(
named_params! {
":uri": uri,
":hash": hash,
":http_code": http_code,
":content_type": content_type
},
|row| <(u32, u32)>::try_from(row),
)?);
}
}
fn db_add_blob<Con: ConnectionLike>(
con: &mut Con,
hash: &str,
n_bytes: Option<u64>,
) -> Result<usize, Error> {
let mut add_blob = con
.prepare_cached(include_str!("q/upsert-blob.sql"))
.context("Failed to prepare upsert-blob.sql")?;
Ok(add_blob.execute(params![hash, n_bytes,])?)
}
fn db_add_uri<Con: ConnectionLike>(con: &mut Con, uri: &str) -> Result<usize, Error> {
let mut add_uri = con
.prepare_cached(include_str!("q/upsert-uri.sql"))
.context("Failed to prepare upsert-uri.sql")?;
Ok(add_uri.execute(params![uri])?)
}
impl<BS: BlobService + Clone, DS: DirectoryService + Clone> SidxContext<BS, DS> {
async fn latest_sample(&self, uri: &str) -> Result<Option<Sampled>, Error> {
let lock = self.con.lock().unwrap();
@ -265,42 +345,6 @@ impl<BS: BlobService + Clone, DS: DirectoryService + Clone> SidxContext<BS, DS>
None => Ok(None),
})
}
async fn db_add_sample(
&self,
uri: &str,
hash: &Option<String>,
http_code: &Option<u16>,
content_type: &Option<String>,
) -> Result<(u32, u32), Error> {
let lock = self.con.lock().expect("Locking mutex for db_add_sample");
let mut add_sample = lock
.prepare_cached(include_str!("q/add-sample.sql"))
.context("Failed to prepare add-sample.sql")?;
Ok(add_sample.query_row(
named_params! {
":uri": uri,
":hash": hash,
":http_code": http_code,
":content_type": content_type
},
|row| <(u32, u32)>::try_from(row),
)?)
}
async fn db_add_blob(&self, hash: &str, n_bytes: u64) -> Result<usize, Error> {
let lock = self.con.lock().expect("db_add_blob: couldn't lock mutex?");
let mut add_blob = lock
.prepare_cached(include_str!("q/upsert-blob.sql"))
.context("Failed to prepare upsert-blob.sql")?;
Ok(add_blob.execute(params![hash, n_bytes,])?)
}
async fn db_add_uri(&self, uri: &str) -> Result<usize, Error> {
let lock = self.con.lock().unwrap();
let mut add_uri = lock
.prepare_cached(include_str!("q/upsert-uri.sql"))
.context("Failed to prepare upsert-uri.sql")?;
Ok(add_uri.execute(params![uri])?)
}
async fn record_ingested_node(
&self,
uri: &str,
@ -308,17 +352,19 @@ impl<BS: BlobService + Clone, DS: DirectoryService + Clone> SidxContext<BS, DS>
http_code: Option<u16>,
content_type: Option<String>,
) -> Result<Sampled, Error> {
let mut lock = self.con.lock().unwrap();
let mut tx = lock.transaction()?;
{
let digest64 = if let Some(SizedBlob { hash, n_bytes }) = blob {
let digest64 = format!("{}", hash);
self.db_add_blob(&digest64, n_bytes.clone()).await?;
db_add_blob(&mut tx, &digest64, Some(n_bytes.clone()))?;
Some(digest64)
} else {
None
};
self.db_add_uri(&uri).await?;
let (sample_id, epoch) = self
.db_add_sample(&uri, &digest64, &http_code, &content_type)
.await?;
db_add_uri(&mut tx, &uri)?;
let (sample_id, epoch) =
db_add_sample(&mut tx, &uri, &digest64, &http_code, &content_type)?;
Ok(Sampled {
sample_id,
uri: uri.to_string(),
@ -328,6 +374,7 @@ impl<BS: BlobService + Clone, DS: DirectoryService + Clone> SidxContext<BS, DS>
when: SampledWhen::Now,
})
}
}
async fn download(&self, uri: &Url) -> Result<Sampled, Error> {
let _permit = self.http_semaphore.acquire().await.unwrap();
eprintln!("Downloading {:?}", uri.to_string());

View file

@ -0,0 +1,34 @@
SELECT
COUNT(DISTINCT sha256) AS conflicts,
pname.str AS pname,
ver.str AS ver,
plat.str AS plat,
GROUP_CONCAT(name.str, char(10)) AS name,
GROUP_CONCAT(IFNULL(tag.str, char(0x274C)), char(10)) AS tag,
GROUP_CONCAT(h.hash, char(10)) AS sha256
FROM
(
CudaArtifact AS cc,
Str AS name,
Str AS pname,
Str as ver,
Str as plat,
Hash as h ON cc.name = name.id
AND cc.pname = pname.id
AND cc.version = ver.id
AND cc.platform = plat.id
AND cc.sha256 = h.id
)
LEFT JOIN Str AS tag ON cc.compat_tag = tag.id
GROUP BY
cc.pname,
cc.version,
cc.platform
HAVING
conflicts >= CAST(:min_conflicts AS INTEGER)
ORDER BY
conflicts DESC,
cc.pname,
cc.version,
cc.platform,
tag.str

View file

@ -0,0 +1,73 @@
WITH PerQuadruple AS (
SELECT
pname.str AS pname,
IFNULL(ver.str, char(0x274C)) AS version,
plat.str AS platform,
IFNULL(ct.str, char(0x274C)) AS tag,
COUNT(DISTINCT sha256.hash) AS uq_sha256s
FROM
(
CudaArtifact AS ca,
Str AS pname,
Str AS plat,
Hash AS sha256 ON ca.pname = pname.id
AND ca.platform = plat.id
AND sha256.id = ca.sha256
)
LEFT JOIN Str AS ver ON ver.id = ca.version
LEFT JOIN Str AS ct ON ca.compat_tag = ct.id
GROUP BY
pname.id,
version,
platform,
tag
ORDER BY
uq_sha256s DESC,
pname.str,
version,
plat.str,
tag
),
ByPname AS (
SELECT
pname,
REPLACE(GROUP_CONCAT(DISTINCT pq.version), ',', char(10)) AS versions,
REPLACE(
GROUP_CONCAT(DISTINCT pq.platform),
',',
char(10)
) AS platforms,
REPLACE(GROUP_CONCAT(DISTINCT pq.tag), ',', char(10)) AS tags,
SUM(pq.uq_sha256s) AS uq_sha256s
FROM
PerQuadruple AS pq
GROUP BY
pq.pname
),
VersionGroups AS (
SELECT
REPLACE(GROUP_CONCAT(DISTINCT pname), ',', char(10)) AS pnames,
versions
FROM
ByPname AS bp
GROUP BY
versions
ORDER BY
LENGTH(pnames) DESC
),
PlatformGroups AS (
SELECT
REPLACE(GROUP_CONCAT(DISTINCT pname), ',', char(10)) AS pnames,
platforms
FROM
ByPname AS bp
GROUP BY
platforms
ORDER BY
LENGTH(platforms) DESC,
LENGTH(pnames)
)
SELECT
*
FROM
PlatformGroups

View file

@ -0,0 +1,87 @@
WITH PerQuadruple AS (
SELECT
pname.str AS pname,
IFNULL(ver.str, char(0x274C)) AS version,
plat.str AS platform,
IFNULL(ct.str, char(0x274C)) AS tag,
COUNT(DISTINCT sha256.hash) AS uq_sha256s
FROM
(
CudaArtifact AS ca,
Str AS pname,
Str AS plat,
Hash AS sha256 ON ca.pname = pname.id
AND ca.platform = plat.id
AND sha256.id = ca.sha256
)
LEFT JOIN Str AS ver ON ver.id = ca.version
LEFT JOIN Str AS ct ON ca.compat_tag = ct.id
GROUP BY
pname.id,
version,
platform,
tag
ORDER BY
uq_sha256s DESC,
pname.str,
version,
plat.str,
tag
)
SELECT
pname,
(
SELECT
GROUP_CONCAT(x, char(10))
FROM
(
SELECT
DISTINCT version AS x
FROM
PerQuadruple
WHERE
pname = pq.pname
ORDER BY
x
)
) AS versions,
(
SELECT
GROUP_CONCAT(x, char(10))
FROM
(
SELECT
DISTINCT platform AS x
FROM
PerQuadruple
WHERE
pname = pq.pname
ORDER BY
x
)
) AS platforms,
(
SELECT
GROUP_CONCAT(x, char(10))
FROM
(
SELECT
DISTINCT tag AS x
FROM
PerQuadruple
WHERE
pname = pq.pname
ORDER BY
x
)
) AS tags,
(
SELECT
SUM(uq_sha256s)
FROM
PerQuadruple
WHERE
pname = pq.pname
) AS uq_sha256s
FROM
PerQuadruple AS pq