Bring back parquet from stdin. Fixes.
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/tag/woodpecker Pipeline was successful

This commit is contained in:
Guillem Borrell 2022-12-26 12:54:17 +00:00
parent 2b18f7b5e3
commit fc063601c5
3 changed files with 51 additions and 24 deletions

View file

@ -1,7 +1,7 @@
[package]
name = "dr"
description = "Command-line data file processing in Rust"
version = "0.5.2"
version = "0.5.3"
edition = "2021"
include = [
"**/*.rs",
@ -15,6 +15,6 @@ repository = "https://git.guillemborrell.es/guillem/dr"
[dependencies]
clap = {version = "4.0", features = ["cargo"]}
polars-lazy = {"version" = "0.26", "features" = ["parquet", "ipc", "csv-file"]}
polars-core = {"version" = "0.26", "features" = ["describe"]}
polars-core = {"version" = "0.26", "features" = ["describe", "fmt"]}
polars-io = {"version" = "0.26", "features" = ["ipc_streaming"]}
polars-sql = {"version" = "0.2.2"}

View file

@ -44,6 +44,20 @@ pub fn load_csv_from_stdin() -> LazyFrame {
}
}
/// Read CSV format from stdin and return a Polars DataFrame
pub fn load_parquet_from_stdin() -> LazyFrame {
let mut buffer = Vec::new();
let _res: () = match io::stdin().lock().read_to_end(&mut buffer) {
Ok(_ok) => (),
Err(_e) => (),
};
let cursor = io::Cursor::new(buffer);
match ParquetReader::new(cursor).finish() {
Ok(df) => df.lazy(),
Err(_e) => LazyFrame::default(),
}
}
/// Write to IPC steram
pub fn write_ipc(df: LazyFrame) {
IpcStreamWriter::new(io::stdout().lock())
@ -75,5 +89,6 @@ pub fn write_parquet(ldf: LazyFrame, path: String) {
data_pagesize_limit: None,
maintain_order: false,
},
).expect("Could not save");
)
.expect("Could not save");
}

View file

@ -1,6 +1,7 @@
mod io;
mod sql;
use clap::{arg, command, ArgAction, Command};
use polars_lazy::prelude::*;
fn main() {
let matches = command!()
@ -55,12 +56,20 @@ fn main() {
.required(false)
.action(ArgAction::SetTrue),
)
.arg(
arg!(-i --stdin ... "Read from stdin instead than from a file")
.required(false)
.action(ArgAction::SetTrue),
)
.arg(
arg!(-t --text ... "Output text instead of binary")
.required(false)
.action(ArgAction::SetTrue),
)
.arg(arg!(-P --text <String> "Write the result as a parquet file").required(false))
.arg(
arg!(-P --parquet <String> "Write the result as a parquet file")
.required(false),
)
.arg(
arg!(-a --head ... "Print the header of the table")
.required(false)
@ -123,30 +132,33 @@ fn main() {
};
println!("{}", df.collect().expect("Could not collect"));
} else if let Some(_matches) = matches.subcommand_matches("rpq") {
if let Some(path) = _matches.get_one::<String>("path") {
let mut ldf = io::read_parquet(path.to_string());
if let Some(query) = _matches.get_one::<String>("query") {
ldf = sql::execute(ldf, query);
}
if _matches.get_flag("summary") {
let df = ldf.collect().expect("Could not collect");
println!("{}", df.describe(None));
} else if _matches.get_flag("head") {
let df = ldf.fetch(5).expect("Could not fetch");
println!("{}", df)
let mut ldf = LazyFrame::default();
if _matches.get_flag("stdin") {
ldf = io::load_parquet_from_stdin();
} else if let Some(path) = _matches.get_one::<String>("path") {
ldf = io::read_parquet(path.to_string());
} else {
eprintln!("File not found or not reading from stdin")
}
if let Some(query) = _matches.get_one::<String>("query") {
ldf = sql::execute(ldf, query);
}
if _matches.get_flag("summary") {
let df = ldf.collect().expect("Could not collect");
println!("{}", df.describe(None));
} else if _matches.get_flag("head") {
let df = ldf.fetch(5).expect("Could not fetch");
println!("{}", df)
} else {
if _matches.get_flag("text") {
io::dump_csv_to_stdout(ldf);
} else {
if _matches.get_flag("text") {
io::dump_csv_to_stdout(ldf);
if let Some(path) = _matches.get_one::<String>("parquet") {
io::write_parquet(ldf, path.to_string());
} else {
if let Some(path) = _matches.get_one::<String>("parquet") {
io::write_parquet(ldf, path.to_string());
} else {
io::write_ipc(ldf);
}
io::write_ipc(ldf);
}
}
} else {
eprintln!("File not found")
}
} else if let Some(_matches) = matches.subcommand_matches("wpq") {
if let Some(path) = _matches.get_one::<String>("path") {