This commit is contained in:
		
							parent
							
								
									61aabf53b8
								
							
						
					
					
						commit
						6292fb2609
					
				
							
								
								
									
										5
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							|  | @ -19,3 +19,8 @@ Cargo.lock | ||||||
| # Added by cargo | # Added by cargo | ||||||
| 
 | 
 | ||||||
| /target | /target | ||||||
|  |   | ||||||
|  | .vscode | ||||||
|  | .ipynb_checkpoints | ||||||
|  | 
 | ||||||
|  | /data | ||||||
|  |  | ||||||
							
								
								
									
										5
									
								
								.woodpecker.yml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								.woodpecker.yml
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | ||||||
|  | pipeline: | ||||||
|  |   build: | ||||||
|  |     image: rust:1-buster | ||||||
|  |     commands: | ||||||
|  |       - cargo install --path . | ||||||
|  | @ -1,7 +1,7 @@ | ||||||
| [package] | [package] | ||||||
| name = "dr" | name = "dr" | ||||||
| description = "Command-line data file processing in Rust" | description = "Command-line data file processing in Rust" | ||||||
| version = "0.1.0" | version = "0.2.0" | ||||||
| edition = "2021" | edition = "2021" | ||||||
| include = [ | include = [ | ||||||
|     "**/*.rs", |     "**/*.rs", | ||||||
|  |  | ||||||
|  | @ -1,6 +1,6 @@ | ||||||
| # dr.rs | # dr.rs | ||||||
| 
 | 
 | ||||||
| A set of data files (mostly csv and parquet) processing utilities inspired by [csvkit](https://github.com/wireservice/csvkit) with blazing speed, powered by Rust. | A toolkit to process data files (csv and parquet) using the command line, inspired by [csvkit](https://github.com/wireservice/csvkit), with blazing speed, and powered by Rust. | ||||||
| 
 | 
 | ||||||
| You may wonder why I'm implementing this, since there's already [xsv](https://github.com/BurntSushi/xsv). There are two reasons for that: | You may wonder why I'm implementing this, since there's already [xsv](https://github.com/BurntSushi/xsv). There are two reasons for that: | ||||||
| 
 | 
 | ||||||
|  | @ -37,6 +37,10 @@ shape: (3, 2) | ||||||
| └──────┴───────────┘ | └──────┴───────────┘ | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
|  | ## Performance | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| ## Built standing on the shoulders of giants | ## Built standing on the shoulders of giants | ||||||
| 
 | 
 | ||||||
| None of this would be possible without [Polars](https://github.com/pola-rs/polars) | None of this would be possible without [Polars](https://github.com/pola-rs/polars) | ||||||
							
								
								
									
										7
									
								
								python/group.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										7
									
								
								python/group.py
									
									
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,7 @@ | ||||||
|  | #!/usr/bin/env python3 | ||||||
|  | 
 | ||||||
|  | import sys | ||||||
|  | import pandas as pd | ||||||
|  | 
 | ||||||
|  | df = pd.read_csv(sys.stdin) | ||||||
|  | print(df.groupby("Dept", as_index=False).Weekly_Sales.mean()) | ||||||
							
								
								
									
										8
									
								
								queries/weekly_sales_by_dept.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								queries/weekly_sales_by_dept.sql
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,8 @@ | ||||||
|  | select  | ||||||
|  |     Dept,  | ||||||
|  |     avg(Weekly_Sales)  | ||||||
|  | from  | ||||||
|  |     this  | ||||||
|  | group by  | ||||||
|  |     Dept | ||||||
|  |      | ||||||
							
								
								
									
										14
									
								
								src/io.rs
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								src/io.rs
									
									
									
									
									
								
							|  | @ -1,5 +1,6 @@ | ||||||
| use polars::frame::DataFrame; | use polars::frame::DataFrame; | ||||||
| use polars::prelude::*; | use polars::prelude::*; | ||||||
|  | use std::fs; | ||||||
| use std::io; | use std::io; | ||||||
| use std::io::Read; | use std::io::Read; | ||||||
| 
 | 
 | ||||||
|  | @ -26,15 +27,10 @@ pub fn dump_csv_to_stdout(df: &mut DataFrame) { | ||||||
|     }; |     }; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /// Read parquet format from stdin and return a Polars DataFrame
 | /// Read parquet and return a Polars DataFrame
 | ||||||
| pub fn load_parquet_from_stdin() -> DataFrame { | pub fn read_parquet(path: String) -> DataFrame { | ||||||
|     let mut buffer: String = String::new(); |     let file = fs::File::open(path).expect("Could not open file"); | ||||||
|     let _res: () = match io::stdin().read_to_string(&mut buffer) { |     let df = match ParquetReader::new(file).finish() { | ||||||
|         Ok(_ok) => (), |  | ||||||
|         Err(_e) => (), |  | ||||||
|     }; |  | ||||||
|     let cursor = io::Cursor::new(buffer.as_bytes()); |  | ||||||
|     let df = match ParquetReader::new(cursor).finish() { |  | ||||||
|         Ok(df) => df, |         Ok(df) => df, | ||||||
|         Err(e) => { |         Err(e) => { | ||||||
|             eprintln!("{e}"); |             eprintln!("{e}"); | ||||||
|  |  | ||||||
							
								
								
									
										24
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								src/main.rs
									
									
									
									
									
								
							|  | @ -11,7 +11,11 @@ fn main() { | ||||||
|                 .arg(arg!(-d --delimiter <String> "Column delimiter").required(false)), |                 .arg(arg!(-d --delimiter <String> "Column delimiter").required(false)), | ||||||
|         ) |         ) | ||||||
|         .subcommand(Command::new("print").about("Pretty prints the table")) |         .subcommand(Command::new("print").about("Pretty prints the table")) | ||||||
|         .subcommand(Command::new("rpq").about("Read parquet file")) |         .subcommand( | ||||||
|  |             Command::new("rpq") | ||||||
|  |                 .about("Read parquet file") | ||||||
|  |                 .arg(arg!([path] "Path to the parquet file")), | ||||||
|  |         ) | ||||||
|         .get_matches(); |         .get_matches(); | ||||||
| 
 | 
 | ||||||
|     if let Some(matches) = matches.subcommand_matches("sql") { |     if let Some(matches) = matches.subcommand_matches("sql") { | ||||||
|  | @ -26,15 +30,17 @@ fn main() { | ||||||
|             let mut df = io::load_csv_from_stdin(); |             let mut df = io::load_csv_from_stdin(); | ||||||
|             io::dump_csv_to_stdout(&mut df); |             io::dump_csv_to_stdout(&mut df); | ||||||
|         } |         } | ||||||
|     } |     } else if let Some(_matches) = matches.subcommand_matches("print") { | ||||||
| 
 |  | ||||||
|     if let Some(_matches) = matches.subcommand_matches("print") { |  | ||||||
|         let df = io::load_csv_from_stdin(); |         let df = io::load_csv_from_stdin(); | ||||||
|         println!("{}", df) |         println!("{}", df) | ||||||
|     } |     } else if let Some(matches) = matches.subcommand_matches("rpq") { | ||||||
| 
 |         if let Some(path) = matches.get_one::<String>("path") { | ||||||
|     if let Some(_matches) = matches.subcommand_matches("rpq") { |             let mut df = io::read_parquet(path.to_string()); | ||||||
|         let mut df = io::load_parquet_from_stdin(); |             io::dump_csv_to_stdout(&mut df); | ||||||
|         io::dump_csv_to_stdout(&mut df); |         } else { | ||||||
|  |             eprintln!("File not found") | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         println!("No command provided. Please execute dr --help") | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue