Multiple improvements
This commit is contained in:
parent
3af97c71f0
commit
2b18f7b5e3
|
@ -1,7 +1,7 @@
|
||||||
[package]
|
[package]
|
||||||
name = "dr"
|
name = "dr"
|
||||||
description = "Command-line data file processing in Rust"
|
description = "Command-line data file processing in Rust"
|
||||||
version = "0.5.1"
|
version = "0.5.2"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
include = [
|
include = [
|
||||||
"**/*.rs",
|
"**/*.rs",
|
||||||
|
@ -14,5 +14,7 @@ repository = "https://git.guillemborrell.es/guillem/dr"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = {version = "4.0", features = ["cargo"]}
|
clap = {version = "4.0", features = ["cargo"]}
|
||||||
polars-lazy = {"version" = "0.25"}
|
polars-lazy = {"version" = "0.26", "features" = ["parquet", "ipc", "csv-file"]}
|
||||||
polars = {"version" = "0.25", features = ["sql", "parquet", "decompress", "ipc", "ipc_streaming", "docs-selection"]}
|
polars-core = {"version" = "0.26", "features" = ["describe"]}
|
||||||
|
polars-io = {"version" = "0.26", "features" = ["ipc_streaming"]}
|
||||||
|
polars-sql = {"version" = "0.2.2"}
|
||||||
|
|
47
src/io.rs
47
src/io.rs
|
@ -1,7 +1,8 @@
|
||||||
use polars::prelude::*;
|
use polars_io::prelude::*;
|
||||||
use std::fs;
|
use polars_lazy::prelude::*;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
/// Read CSV file
|
/// Read CSV file
|
||||||
pub fn read_csv(path: String) -> LazyFrame {
|
pub fn read_csv(path: String) -> LazyFrame {
|
||||||
|
@ -51,36 +52,28 @@ pub fn write_ipc(df: LazyFrame) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Take a Polars Dataframe and write it as CSV to stdout
|
/// Take a Polars Dataframe and write it as CSV to stdout
|
||||||
pub fn dump_csv_to_stdout(df: &mut DataFrame) {
|
pub fn dump_csv_to_stdout(ldf: LazyFrame) {
|
||||||
let _res: () = match CsvWriter::new(io::stdout().lock()).finish(df) {
|
let _res: () = match CsvWriter::new(io::stdout().lock())
|
||||||
|
.finish(&mut ldf.collect().expect("Could not collect"))
|
||||||
|
{
|
||||||
Ok(_ok) => (),
|
Ok(_ok) => (),
|
||||||
Err(_e) => (),
|
Err(_e) => (),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write a Polars DataFrame to Parquet
|
/// Write a Polars DataFrame to Parquet
|
||||||
pub fn write_parquet(
|
pub fn write_parquet(ldf: LazyFrame, path: String) {
|
||||||
mut df: DataFrame,
|
|
||||||
path: String,
|
|
||||||
compression: String,
|
|
||||||
statistics: bool,
|
|
||||||
chunksize: Option<usize>,
|
|
||||||
) {
|
|
||||||
// Selected compression not implemented yet
|
// Selected compression not implemented yet
|
||||||
let mut _file = match fs::File::create(path) {
|
let mut p = PathBuf::new();
|
||||||
Ok(mut file) => {
|
p.push(path);
|
||||||
let mut w = ParquetWriter::new(&mut file);
|
ldf.sink_parquet(
|
||||||
if statistics {
|
p,
|
||||||
w = w.with_statistics(statistics);
|
ParquetWriteOptions {
|
||||||
}
|
compression: ParquetCompression::Snappy,
|
||||||
if chunksize.unwrap_or(0) > 0 {
|
statistics: true,
|
||||||
w = w.with_row_group_size(chunksize);
|
row_group_size: None,
|
||||||
}
|
data_pagesize_limit: None,
|
||||||
let _r = match w.finish(&mut df) {
|
maintain_order: false,
|
||||||
Ok(_r) => (),
|
},
|
||||||
Err(e) => eprintln!("{e}"),
|
).expect("Could not save");
|
||||||
};
|
|
||||||
}
|
|
||||||
Err(e) => eprintln!("{e}"),
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
30
src/main.rs
30
src/main.rs
|
@ -93,16 +93,10 @@ fn main() {
|
||||||
println!("{}", df)
|
println!("{}", df)
|
||||||
} else {
|
} else {
|
||||||
if _matches.get_flag("text") {
|
if _matches.get_flag("text") {
|
||||||
io::dump_csv_to_stdout(&mut ldf.collect().expect("Could not collect"));
|
io::dump_csv_to_stdout(ldf);
|
||||||
} else {
|
} else {
|
||||||
if let Some(path) = _matches.get_one::<String>("parquet") {
|
if let Some(path) = _matches.get_one::<String>("parquet") {
|
||||||
io::write_parquet(
|
io::write_parquet(ldf, path.to_string());
|
||||||
ldf.collect().expect("Could not collect"),
|
|
||||||
path.to_string(),
|
|
||||||
"lz4raw".to_string(),
|
|
||||||
true,
|
|
||||||
Some(0),
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
io::write_ipc(ldf);
|
io::write_ipc(ldf);
|
||||||
}
|
}
|
||||||
|
@ -142,16 +136,10 @@ fn main() {
|
||||||
println!("{}", df)
|
println!("{}", df)
|
||||||
} else {
|
} else {
|
||||||
if _matches.get_flag("text") {
|
if _matches.get_flag("text") {
|
||||||
io::dump_csv_to_stdout(&mut ldf.collect().expect("Could not collect"));
|
io::dump_csv_to_stdout(ldf);
|
||||||
} else {
|
} else {
|
||||||
if let Some(path) = _matches.get_one::<String>("parquet") {
|
if let Some(path) = _matches.get_one::<String>("parquet") {
|
||||||
io::write_parquet(
|
io::write_parquet(ldf, path.to_string());
|
||||||
ldf.collect().expect("Could not collect"),
|
|
||||||
path.to_string(),
|
|
||||||
"lz4raw".to_string(),
|
|
||||||
true,
|
|
||||||
Some(0),
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
io::write_ipc(ldf);
|
io::write_ipc(ldf);
|
||||||
}
|
}
|
||||||
|
@ -162,18 +150,12 @@ fn main() {
|
||||||
}
|
}
|
||||||
} else if let Some(_matches) = matches.subcommand_matches("wpq") {
|
} else if let Some(_matches) = matches.subcommand_matches("wpq") {
|
||||||
if let Some(path) = _matches.get_one::<String>("path") {
|
if let Some(path) = _matches.get_one::<String>("path") {
|
||||||
let df = if _matches.get_flag("text") {
|
let ldf = if _matches.get_flag("text") {
|
||||||
io::load_csv_from_stdin()
|
io::load_csv_from_stdin()
|
||||||
} else {
|
} else {
|
||||||
io::read_ipc()
|
io::read_ipc()
|
||||||
};
|
};
|
||||||
io::write_parquet(
|
io::write_parquet(ldf, path.to_string());
|
||||||
df.collect().expect("Could not collect"),
|
|
||||||
path.to_string(),
|
|
||||||
"lz4raw".to_string(),
|
|
||||||
true,
|
|
||||||
Some(0),
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
eprintln!("Could now write to parquet");
|
eprintln!("Could now write to parquet");
|
||||||
}
|
}
|
||||||
|
|
15
src/sql.rs
15
src/sql.rs
|
@ -1,15 +1,10 @@
|
||||||
use polars::sql::SQLContext;
|
use polars_sql::SQLContext;
|
||||||
use polars_lazy::frame::LazyFrame;
|
use polars_lazy::prelude::LazyFrame;
|
||||||
|
|
||||||
pub fn execute(ldf: LazyFrame, statement: &String) -> LazyFrame {
|
pub fn execute(ldf: LazyFrame, statement: &String) -> LazyFrame {
|
||||||
let mut context = SQLContext::try_new().expect("Could not create context");
|
let mut context = SQLContext::try_new().expect("Could not create context");
|
||||||
context.register("this", ldf);
|
context.register("this", ldf);
|
||||||
|
context
|
||||||
match context.execute(statement) {
|
.execute(statement)
|
||||||
Ok(res) => res,
|
.expect("Could not execute statement")
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Query execution error {e}");
|
|
||||||
LazyFrame::default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue