This commit is contained in:
parent
61aabf53b8
commit
6292fb2609
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -19,3 +19,8 @@ Cargo.lock
|
||||||
# Added by cargo
|
# Added by cargo
|
||||||
|
|
||||||
/target
|
/target
|
||||||
|
|
||||||
|
.vscode
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
/data
|
||||||
|
|
5
.woodpecker.yml
Normal file
5
.woodpecker.yml
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
pipeline:
|
||||||
|
build:
|
||||||
|
image: rust:1-buster
|
||||||
|
commands:
|
||||||
|
- cargo install --path .
|
|
@ -1,7 +1,7 @@
|
||||||
[package]
|
[package]
|
||||||
name = "dr"
|
name = "dr"
|
||||||
description = "Command-line data file processing in Rust"
|
description = "Command-line data file processing in Rust"
|
||||||
version = "0.1.0"
|
version = "0.2.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
include = [
|
include = [
|
||||||
"**/*.rs",
|
"**/*.rs",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# dr.rs
|
# dr.rs
|
||||||
|
|
||||||
A set of data files (mostly csv and parquet) processing utilities inspired by [csvkit](https://github.com/wireservice/csvkit) with blazing speed, powered by Rust.
|
A toolkit to process data files (csv and parquet) using the command line, inspired by [csvkit](https://github.com/wireservice/csvkit), with blazing speed, and powered by Rust.
|
||||||
|
|
||||||
You may wonder why I'm implementing this, since there's already [xsv](https://github.com/BurntSushi/xsv). There are two reasons for that:
|
You may wonder why I'm implementing this, since there's already [xsv](https://github.com/BurntSushi/xsv). There are two reasons for that:
|
||||||
|
|
||||||
|
@ -37,6 +37,10 @@ shape: (3, 2)
|
||||||
└──────┴───────────┘
|
└──────┴───────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Built standing on the shoulders of giants
|
## Built standing on the shoulders of giants
|
||||||
|
|
||||||
None of this would be possible without [Polars](https://github.com/pola-rs/polars)
|
None of this would be possible without [Polars](https://github.com/pola-rs/polars)
|
7
python/group.py
Executable file
7
python/group.py
Executable file
|
@ -0,0 +1,7 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
df = pd.read_csv(sys.stdin)
|
||||||
|
print(df.groupby("Dept", as_index=False).Weekly_Sales.mean())
|
8
queries/weekly_sales_by_dept.sql
Normal file
8
queries/weekly_sales_by_dept.sql
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
select
|
||||||
|
Dept,
|
||||||
|
avg(Weekly_Sales)
|
||||||
|
from
|
||||||
|
this
|
||||||
|
group by
|
||||||
|
Dept
|
||||||
|
|
14
src/io.rs
14
src/io.rs
|
@ -1,5 +1,6 @@
|
||||||
use polars::frame::DataFrame;
|
use polars::frame::DataFrame;
|
||||||
use polars::prelude::*;
|
use polars::prelude::*;
|
||||||
|
use std::fs;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
|
|
||||||
|
@ -26,15 +27,10 @@ pub fn dump_csv_to_stdout(df: &mut DataFrame) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read parquet format from stdin and return a Polars DataFrame
|
/// Read parquet and return a Polars DataFrame
|
||||||
pub fn load_parquet_from_stdin() -> DataFrame {
|
pub fn read_parquet(path: String) -> DataFrame {
|
||||||
let mut buffer: String = String::new();
|
let file = fs::File::open(path).expect("Could not open file");
|
||||||
let _res: () = match io::stdin().read_to_string(&mut buffer) {
|
let df = match ParquetReader::new(file).finish() {
|
||||||
Ok(_ok) => (),
|
|
||||||
Err(_e) => (),
|
|
||||||
};
|
|
||||||
let cursor = io::Cursor::new(buffer.as_bytes());
|
|
||||||
let df = match ParquetReader::new(cursor).finish() {
|
|
||||||
Ok(df) => df,
|
Ok(df) => df,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("{e}");
|
eprintln!("{e}");
|
||||||
|
|
22
src/main.rs
22
src/main.rs
|
@ -11,7 +11,11 @@ fn main() {
|
||||||
.arg(arg!(-d --delimiter <String> "Column delimiter").required(false)),
|
.arg(arg!(-d --delimiter <String> "Column delimiter").required(false)),
|
||||||
)
|
)
|
||||||
.subcommand(Command::new("print").about("Pretty prints the table"))
|
.subcommand(Command::new("print").about("Pretty prints the table"))
|
||||||
.subcommand(Command::new("rpq").about("Read parquet file"))
|
.subcommand(
|
||||||
|
Command::new("rpq")
|
||||||
|
.about("Read parquet file")
|
||||||
|
.arg(arg!([path] "Path to the parquet file")),
|
||||||
|
)
|
||||||
.get_matches();
|
.get_matches();
|
||||||
|
|
||||||
if let Some(matches) = matches.subcommand_matches("sql") {
|
if let Some(matches) = matches.subcommand_matches("sql") {
|
||||||
|
@ -26,15 +30,17 @@ fn main() {
|
||||||
let mut df = io::load_csv_from_stdin();
|
let mut df = io::load_csv_from_stdin();
|
||||||
io::dump_csv_to_stdout(&mut df);
|
io::dump_csv_to_stdout(&mut df);
|
||||||
}
|
}
|
||||||
}
|
} else if let Some(_matches) = matches.subcommand_matches("print") {
|
||||||
|
|
||||||
if let Some(_matches) = matches.subcommand_matches("print") {
|
|
||||||
let df = io::load_csv_from_stdin();
|
let df = io::load_csv_from_stdin();
|
||||||
println!("{}", df)
|
println!("{}", df)
|
||||||
}
|
} else if let Some(matches) = matches.subcommand_matches("rpq") {
|
||||||
|
if let Some(path) = matches.get_one::<String>("path") {
|
||||||
if let Some(_matches) = matches.subcommand_matches("rpq") {
|
let mut df = io::read_parquet(path.to_string());
|
||||||
let mut df = io::load_parquet_from_stdin();
|
|
||||||
io::dump_csv_to_stdout(&mut df);
|
io::dump_csv_to_stdout(&mut df);
|
||||||
|
} else {
|
||||||
|
eprintln!("File not found")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println!("No command provided. Please execute dr --help")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue