Compare commits

..

2 commits

Author SHA1 Message Date
Guillem Borrell 6292fb2609 Added cicd script
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2022-11-20 22:00:58 +00:00
Guillem Borrell 61aabf53b8 Add package metadata 2022-11-20 12:00:03 +00:00
8 changed files with 58 additions and 20 deletions

5
.gitignore vendored
View file

@ -19,3 +19,8 @@ Cargo.lock
# Added by cargo # Added by cargo
/target /target
.vscode
.ipynb_checkpoints
/data

5
.woodpecker.yml Normal file
View file

@ -0,0 +1,5 @@
pipeline:
build:
image: rust:1-buster
commands:
- cargo install --path .

View file

@ -1,7 +1,14 @@
[package] [package]
name = "dr" name = "dr"
version = "0.1.0" description = "Command-line data file processing in Rust"
version = "0.2.0"
edition = "2021" edition = "2021"
include = [
"**/*.rs",
"Cargo.toml",
]
license-file = "LICENSE"
repository = "https://git.guillemborrell.es/guillem/dr"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View file

@ -1,6 +1,6 @@
# dr.rs # dr.rs
A set of data files (mostly csv and parquet) processing utilities inspired by [csvkit](https://github.com/wireservice/csvkit) with blazing speed, powered by Rust. A toolkit to process data files (csv and parquet) using the command line, inspired by [csvkit](https://github.com/wireservice/csvkit), with blazing speed, and powered by Rust.
You may wonder why I'm implementing this, since there's already [xsv](https://github.com/BurntSushi/xsv). There are two reasons for that: You may wonder why I'm implementing this, since there's already [xsv](https://github.com/BurntSushi/xsv). There are two reasons for that:
@ -37,6 +37,10 @@ shape: (3, 2)
└──────┴───────────┘ └──────┴───────────┘
``` ```
## Performance
## Built standing on the shoulders of giants ## Built standing on the shoulders of giants
None of this would be possible without [Polars](https://github.com/pola-rs/polars) None of this would be possible without [Polars](https://github.com/pola-rs/polars)

7
python/group.py Executable file
View file

@ -0,0 +1,7 @@
#!/usr/bin/env python3
import sys
import pandas as pd
df = pd.read_csv(sys.stdin)
print(df.groupby("Dept", as_index=False).Weekly_Sales.mean())

View file

@ -0,0 +1,8 @@
select
Dept,
avg(Weekly_Sales)
from
this
group by
Dept

View file

@ -1,5 +1,6 @@
use polars::frame::DataFrame; use polars::frame::DataFrame;
use polars::prelude::*; use polars::prelude::*;
use std::fs;
use std::io; use std::io;
use std::io::Read; use std::io::Read;
@ -26,15 +27,10 @@ pub fn dump_csv_to_stdout(df: &mut DataFrame) {
}; };
} }
/// Read parquet format from stdin and return a Polars DataFrame /// Read parquet and return a Polars DataFrame
pub fn load_parquet_from_stdin() -> DataFrame { pub fn read_parquet(path: String) -> DataFrame {
let mut buffer: String = String::new(); let file = fs::File::open(path).expect("Could not open file");
let _res: () = match io::stdin().read_to_string(&mut buffer) { let df = match ParquetReader::new(file).finish() {
Ok(_ok) => (),
Err(_e) => (),
};
let cursor = io::Cursor::new(buffer.as_bytes());
let df = match ParquetReader::new(cursor).finish() {
Ok(df) => df, Ok(df) => df,
Err(e) => { Err(e) => {
eprintln!("{e}"); eprintln!("{e}");

View file

@ -11,7 +11,11 @@ fn main() {
.arg(arg!(-d --delimiter <String> "Column delimiter").required(false)), .arg(arg!(-d --delimiter <String> "Column delimiter").required(false)),
) )
.subcommand(Command::new("print").about("Pretty prints the table")) .subcommand(Command::new("print").about("Pretty prints the table"))
.subcommand(Command::new("rpq").about("Read parquet file")) .subcommand(
Command::new("rpq")
.about("Read parquet file")
.arg(arg!([path] "Path to the parquet file")),
)
.get_matches(); .get_matches();
if let Some(matches) = matches.subcommand_matches("sql") { if let Some(matches) = matches.subcommand_matches("sql") {
@ -26,15 +30,17 @@ fn main() {
let mut df = io::load_csv_from_stdin(); let mut df = io::load_csv_from_stdin();
io::dump_csv_to_stdout(&mut df); io::dump_csv_to_stdout(&mut df);
} }
} } else if let Some(_matches) = matches.subcommand_matches("print") {
if let Some(_matches) = matches.subcommand_matches("print") {
let df = io::load_csv_from_stdin(); let df = io::load_csv_from_stdin();
println!("{}", df) println!("{}", df)
} } else if let Some(matches) = matches.subcommand_matches("rpq") {
if let Some(path) = matches.get_one::<String>("path") {
if let Some(_matches) = matches.subcommand_matches("rpq") { let mut df = io::read_parquet(path.to_string());
let mut df = io::load_parquet_from_stdin();
io::dump_csv_to_stdout(&mut df); io::dump_csv_to_stdout(&mut df);
} else {
eprintln!("File not found")
}
} else {
println!("No command provided. Please execute dr --help")
} }
} }