This commit is contained in:
		
							parent
							
								
									61aabf53b8
								
							
						
					
					
						commit
						6292fb2609
					
				
							
								
								
									
										5
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							|  | @ -19,3 +19,8 @@ Cargo.lock | |||
| # Added by cargo | ||||
| 
 | ||||
| /target | ||||
|   | ||||
| .vscode | ||||
| .ipynb_checkpoints | ||||
| 
 | ||||
| /data | ||||
|  |  | |||
							
								
								
									
										5
									
								
								.woodpecker.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								.woodpecker.yml
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | |||
| pipeline: | ||||
|   build: | ||||
|     image: rust:1-buster | ||||
|     commands: | ||||
|       - cargo install --path . | ||||
|  | @ -1,7 +1,7 @@ | |||
| [package] | ||||
| name = "dr" | ||||
| description = "Command-line data file processing in Rust" | ||||
| version = "0.1.0" | ||||
| version = "0.2.0" | ||||
| edition = "2021" | ||||
| include = [ | ||||
|     "**/*.rs", | ||||
|  |  | |||
|  | @ -1,6 +1,6 @@ | |||
| # dr.rs | ||||
| 
 | ||||
| A set of data files (mostly csv and parquet) processing utilities inspired by [csvkit](https://github.com/wireservice/csvkit) with blazing speed, powered by Rust. | ||||
| A toolkit to process data files (csv and parquet) using the command line, inspired by [csvkit](https://github.com/wireservice/csvkit), with blazing speed, and powered by Rust. | ||||
| 
 | ||||
| You may wonder why I'm implementing this, since there's already [xsv](https://github.com/BurntSushi/xsv). There are two reasons for that: | ||||
| 
 | ||||
|  | @ -37,6 +37,10 @@ shape: (3, 2) | |||
| └──────┴───────────┘ | ||||
| ``` | ||||
| 
 | ||||
| ## Performance | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| ## Built standing on the shoulders of giants | ||||
| 
 | ||||
| None of this would be possible without [Polars](https://github.com/pola-rs/polars) | ||||
							
								
								
									
										7
									
								
								python/group.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										7
									
								
								python/group.py
									
									
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| import sys | ||||
| import pandas as pd | ||||
| 
 | ||||
| df = pd.read_csv(sys.stdin) | ||||
| print(df.groupby("Dept", as_index=False).Weekly_Sales.mean()) | ||||
							
								
								
									
										8
									
								
								queries/weekly_sales_by_dept.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								queries/weekly_sales_by_dept.sql
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,8 @@ | |||
| select  | ||||
|     Dept,  | ||||
|     avg(Weekly_Sales)  | ||||
| from  | ||||
|     this  | ||||
| group by  | ||||
|     Dept | ||||
|      | ||||
							
								
								
									
										14
									
								
								src/io.rs
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								src/io.rs
									
									
									
									
									
								
							|  | @ -1,5 +1,6 @@ | |||
| use polars::frame::DataFrame; | ||||
| use polars::prelude::*; | ||||
| use std::fs; | ||||
| use std::io; | ||||
| use std::io::Read; | ||||
| 
 | ||||
|  | @ -26,15 +27,10 @@ pub fn dump_csv_to_stdout(df: &mut DataFrame) { | |||
|     }; | ||||
| } | ||||
| 
 | ||||
| /// Read parquet format from stdin and return a Polars DataFrame
 | ||||
| pub fn load_parquet_from_stdin() -> DataFrame { | ||||
|     let mut buffer: String = String::new(); | ||||
|     let _res: () = match io::stdin().read_to_string(&mut buffer) { | ||||
|         Ok(_ok) => (), | ||||
|         Err(_e) => (), | ||||
|     }; | ||||
|     let cursor = io::Cursor::new(buffer.as_bytes()); | ||||
|     let df = match ParquetReader::new(cursor).finish() { | ||||
| /// Read parquet and return a Polars DataFrame
 | ||||
| pub fn read_parquet(path: String) -> DataFrame { | ||||
|     let file = fs::File::open(path).expect("Could not open file"); | ||||
|     let df = match ParquetReader::new(file).finish() { | ||||
|         Ok(df) => df, | ||||
|         Err(e) => { | ||||
|             eprintln!("{e}"); | ||||
|  |  | |||
							
								
								
									
										22
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								src/main.rs
									
									
									
									
									
								
							|  | @ -11,7 +11,11 @@ fn main() { | |||
|                 .arg(arg!(-d --delimiter <String> "Column delimiter").required(false)), | ||||
|         ) | ||||
|         .subcommand(Command::new("print").about("Pretty prints the table")) | ||||
|         .subcommand(Command::new("rpq").about("Read parquet file")) | ||||
|         .subcommand( | ||||
|             Command::new("rpq") | ||||
|                 .about("Read parquet file") | ||||
|                 .arg(arg!([path] "Path to the parquet file")), | ||||
|         ) | ||||
|         .get_matches(); | ||||
| 
 | ||||
|     if let Some(matches) = matches.subcommand_matches("sql") { | ||||
|  | @ -26,15 +30,17 @@ fn main() { | |||
|             let mut df = io::load_csv_from_stdin(); | ||||
|             io::dump_csv_to_stdout(&mut df); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if let Some(_matches) = matches.subcommand_matches("print") { | ||||
|     } else if let Some(_matches) = matches.subcommand_matches("print") { | ||||
|         let df = io::load_csv_from_stdin(); | ||||
|         println!("{}", df) | ||||
|     } | ||||
| 
 | ||||
|     if let Some(_matches) = matches.subcommand_matches("rpq") { | ||||
|         let mut df = io::load_parquet_from_stdin(); | ||||
|     } else if let Some(matches) = matches.subcommand_matches("rpq") { | ||||
|         if let Some(path) = matches.get_one::<String>("path") { | ||||
|             let mut df = io::read_parquet(path.to_string()); | ||||
|             io::dump_csv_to_stdout(&mut df); | ||||
|         } else { | ||||
|             eprintln!("File not found") | ||||
|         } | ||||
|     } else { | ||||
|         println!("No command provided. Please execute dr --help") | ||||
|     } | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue