Compare commits
No commits in common. "main" and "0.3.3" have entirely different histories.
|
@ -1,38 +1,28 @@
|
|||
pipeline:
|
||||
build:
|
||||
image: rust:1-bullseye
|
||||
image: rust:1-buster
|
||||
when:
|
||||
event: tag
|
||||
commands:
|
||||
- cargo build --release
|
||||
buildlinuxaarch64:
|
||||
image: rust:1-bullseye
|
||||
image: rust:1-buster
|
||||
when:
|
||||
event: tag
|
||||
commands:
|
||||
- export CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc
|
||||
- apt-get update && apt-get install -y gcc-aarch64-linux-gnu
|
||||
- rustup target add aarch64-unknown-linux-gnu
|
||||
- cargo build --release --target aarch64-unknown-linux-gnu
|
||||
buildwinamd64:
|
||||
image: rust:1-bullseye
|
||||
when:
|
||||
event: tag
|
||||
commands:
|
||||
- apt-get update && apt-get install -y gcc-mingw-w64
|
||||
- rustup target add x86_64-pc-windows-gnu
|
||||
- cargo build --release --target x86_64-pc-windows-gnu
|
||||
release:
|
||||
image: rust:1-bullseye
|
||||
image: rust:1-buster
|
||||
when:
|
||||
event: tag
|
||||
secrets: [ gitea_api_key ]
|
||||
commands:
|
||||
- curl --user guillem:$GITEA_API_KEY --upload-file target/release/dr https://git.guillemborrell.es/api/packages/guillem/generic/dr/$CI_COMMIT_TAG-linux-amd64/dr
|
||||
- curl --user guillem:$GITEA_API_KEY --upload-file target/aarch64-unknown-linux-gnu/release/dr https://git.guillemborrell.es/api/packages/guillem/generic/dr/$CI_COMMIT_TAG-linux-aarch64/dr
|
||||
- curl --user guillem:$GITEA_API_KEY --upload-file target/x86_64-pc-windows-gnu/release/dr.exe https://git.guillemborrell.es/api/packages/guillem/generic/dr/$CI_COMMIT_TAG-win-amd64/dr.exe
|
||||
- curl --user guillem:$GITEA_API_KEY --upload-file target/release/dr https://git.guillemborrell.es/api/packages/guillem/generic/dr/$CI_COMMIT_TAG/dr-linux-amd64
|
||||
- curl --user guillem:$GITEA_API_KEY --upload-file target/aarch64-unknown-linux-gnu/release/dr https://git.guillemborrell.es/api/packages/guillem/generic/dr/$CI_COMMIT_TAG/dr-linux-aarch64
|
||||
publish:
|
||||
image: rust:1-bullseye
|
||||
image: rust:1-buster
|
||||
when:
|
||||
event: tag
|
||||
secrets: [ cargo_registry_token ]
|
||||
|
|
11
Cargo.toml
11
Cargo.toml
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "dr"
|
||||
description = "Command-line data file processing in Rust"
|
||||
version = "0.7.0"
|
||||
version = "0.3.1"
|
||||
edition = "2021"
|
||||
include = [
|
||||
"**/*.rs",
|
||||
|
@ -14,8 +14,7 @@ repository = "https://git.guillemborrell.es/guillem/dr"
|
|||
|
||||
[dependencies]
|
||||
clap = {version = "4.0", features = ["cargo"]}
|
||||
polars-lazy = {"version" = "0.27", "features" = ["parquet", "ipc", "csv-file"]}
|
||||
polars-core = {"version" = "0.27", "features" = ["describe", "fmt"]}
|
||||
polars-io = {"version" = "0.27", "features" = ["ipc_streaming"]}
|
||||
polars-sql = {"version" = "0.2.3"}
|
||||
sea-query = {"version" = "0.28"}
|
||||
polars = "0.25"
|
||||
polars-sql = "0.2.1"
|
||||
polars-lazy = "0.25"
|
||||
polars-io = {"version" = "0.25", features = ["parquet"]}
|
||||
|
|
308
README.md
308
README.md
|
@ -1,68 +1,71 @@
|
|||
# dr.rs
|
||||
|
||||
[![status-badge](https://ci.guillemborrell.es/api/badges/guillem/dr/status.svg)](https://ci.guillemborrell.es/guillem/dr) | [Download](https://git.guillemborrell.es/guillem/-/packages/generic/dr) | [Source](https://git.guillemborrell.es/guillem/dr) | [Bugs](https://github.com/guillemborrell/dr)
|
||||
[![status-badge](https://ci.guillemborrell.es/api/badges/guillem/dr/status.svg)](https://ci.guillemborrell.es/guillem/dr) | [Download](https://git.guillemborrell.es/guillem/-/packages/generic/dr)
|
||||
|
||||
A toolkit to process data files (csv and parquet) using the command line, inspired by [csvkit](https://github.com/wireservice/csvkit), with blazing speed, and powered by Rust.
|
||||
|
||||
You may wonder why I'm implementing this, since there's already [xsv](https://github.com/BurntSushi/xsv). There are two reasons for that:
|
||||
|
||||
1. This what I'm implementing to learn Rust.
|
||||
1. This what I'm implementing to learn Rust
|
||||
2. The Rust data ecosystem has evolved immensely since xsv was sarted. Now we can add things like SQL commands to filter csv files, or translate results to parquet files.
|
||||
|
||||
## Example
|
||||
|
||||
## TL;DR
|
||||
```bash
|
||||
$ head wine.csv
|
||||
Wine,Alcohol,Malic.acid,Ash,Acl,Mg,Phenols,Flavanoids,Nonflavanoid.phenols,Proanth,Color.int,Hue,OD,Proline
|
||||
1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
|
||||
1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
|
||||
1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
|
||||
1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
|
||||
1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
|
||||
1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
|
||||
1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
|
||||
1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
|
||||
1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
|
||||
|
||||
You can install dr the rust way with `cargo install dr` but downloading a binary from [here](https://git.guillemborrell.es/guillem/-/packages/generic/dr) may be all you need.
|
||||
|
||||
```
|
||||
$ dr --help
|
||||
dr is a handy command line tool to handle csv and parquet files.
|
||||
It is designed to integrate nicely with other command line tools
|
||||
like cat, sed, awk and database clients cli. You can find more
|
||||
information an a short tutorial https://git.guillemborrell.es/guillem/dr
|
||||
|
||||
|
||||
Usage: dr [COMMAND]
|
||||
|
||||
Commands:
|
||||
csv
|
||||
Read csv, output arrow stream
|
||||
schema
|
||||
Several table schema related utilities
|
||||
sql
|
||||
Runs a sql statement on the file
|
||||
print
|
||||
Pretty prints the table
|
||||
rpq
|
||||
Read parquet file
|
||||
wpq
|
||||
Write to a paquet file
|
||||
help
|
||||
Print this message or the help of the given subcommand(s)
|
||||
|
||||
Options:
|
||||
-h, --help
|
||||
Print help information (use `-h` for a summary)
|
||||
|
||||
-V, --version
|
||||
Print version information
|
||||
$ cat wine.csv | dr sql "select Wine, avg(Alcohol) from this group by Wine" | dr print
|
||||
shape: (3, 2)
|
||||
┌──────┬───────────┐
|
||||
│ Wine ┆ Alcohol │
|
||||
│ --- ┆ --- │
|
||||
│ i64 ┆ f64 │
|
||||
╞══════╪═══════════╡
|
||||
│ 3 ┆ 13.15375 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 1 ┆ 13.744746 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 2 ┆ 12.278732 │
|
||||
└──────┴───────────┘
|
||||
```
|
||||
|
||||
## Howto
|
||||
|
||||
`dr` is convenience command to explore, transform, and analyze csv and parquet files to save you from writing throwaway python scripts or create a custom container image for verys simple tasks. It's designed to make the life of a data engineer a little easier.
|
||||
The `dr` command offers a set of subcommands, each one of them with a different functionality. You can get the available subcommands with:
|
||||
|
||||
Assume you have a very large csv file, and you just want to translate it to parquet with some type inference and sane defaults. With `dr` this is as easy as:
|
||||
```bash
|
||||
$ dr --help
|
||||
Command-line data file processing in Rust
|
||||
|
||||
```
|
||||
$ dr csv wine.csv -P wine.pq
|
||||
Usage: dr [COMMAND]
|
||||
|
||||
Commands:
|
||||
sql Runs a sql statement on the file
|
||||
print Pretty prints the table
|
||||
rpq Read parquet file
|
||||
wpq Write to a parquet file
|
||||
help Print this message or the help of the given subcommand(s)
|
||||
|
||||
Options:
|
||||
-h, --help Print help information
|
||||
-V, --version Print version information
|
||||
```
|
||||
|
||||
Parquet files are binary, and you may want to check that you've not written nonsense by printing the header on your terminal.
|
||||
Subcommands can be pipelined unless reading from a file, writing to a file, or pretty prints data. What goes through the pipeline is a plain-text comma separated values with a header. While this may not be the best choice in terms of performance, allows `dr` subcommands to be combined with the usual unix-style command-line tools like `cat`, `head`, `grep`, `awk` and `sed`:
|
||||
|
||||
```
|
||||
$ dr rpq wine.pq -a
|
||||
shape: (5, 14)
|
||||
```bash
|
||||
$ cat wine.csv | head -n 5 | dr print
|
||||
shape: (4, 14)
|
||||
┌──────┬─────────┬────────────┬──────┬─────┬───────────┬──────┬──────┬─────────┐
|
||||
│ Wine ┆ Alcohol ┆ Malic.acid ┆ Ash ┆ ... ┆ Color.int ┆ Hue ┆ OD ┆ Proline │
|
||||
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
|
||||
|
@ -75,159 +78,112 @@ shape: (5, 14)
|
|||
│ 1 ┆ 13.16 ┆ 2.36 ┆ 2.67 ┆ ... ┆ 5.68 ┆ 1.03 ┆ 3.17 ┆ 1185 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
||||
│ 1 ┆ 14.37 ┆ 1.95 ┆ 2.5 ┆ ... ┆ 7.8 ┆ 0.86 ┆ 3.45 ┆ 1480 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
||||
│ 1 ┆ 13.24 ┆ 2.59 ┆ 2.87 ┆ ... ┆ 4.32 ┆ 1.04 ┆ 2.93 ┆ 735 │
|
||||
└──────┴─────────┴────────────┴──────┴─────┴───────────┴──────┴──────┴─────────┘
|
||||
```
|
||||
|
||||
Maybe the most interesing feature of `dr` is the ability to process csv and parquet files using SQL, while solutions like `xsv` and `csvkit` rely on a rich set of subcommands and options. If you already know SQL, there's no need to read any more documentation to select, filter, or group data. The only thing you need to remember is that the table will be called `this`. The following command outputs a csv of the wine with the highest concentration of alcohol in the popular wine dataset:
|
||||
Note that when `dr` loads csv data also tries to guess the data type of each field.
|
||||
|
||||
### Parquet
|
||||
|
||||
`dr` is also useful to translate your csv files to parquet with a single command:
|
||||
|
||||
```bash
|
||||
$ cat wine.csv | dr wpq wine.pq
|
||||
```
|
||||
dr csv wine.csv -q "select * from this where Alcohol = max(Alcohol)" | dr print
|
||||
shape: (1, 14)
|
||||
|
||||
Or explore parquet files
|
||||
|
||||
```bash
|
||||
$ dr rpq wine.pq | head -n 5 | dr print
|
||||
shape: (4, 14)
|
||||
┌──────┬─────────┬────────────┬──────┬─────┬───────────┬──────┬──────┬─────────┐
|
||||
│ Wine ┆ Alcohol ┆ Malic.acid ┆ Ash ┆ ... ┆ Color.int ┆ Hue ┆ OD ┆ Proline │
|
||||
│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │
|
||||
│ i64 ┆ f64 ┆ f64 ┆ f64 ┆ ┆ f64 ┆ f64 ┆ f64 ┆ i64 │
|
||||
╞══════╪═════════╪════════════╪══════╪═════╪═══════════╪══════╪══════╪═════════╡
|
||||
│ 1 ┆ 14.83 ┆ 1.64 ┆ 2.17 ┆ ... ┆ 5.2 ┆ 1.08 ┆ 2.85 ┆ 1045 │
|
||||
│ 1 ┆ 14.23 ┆ 1.71 ┆ 2.43 ┆ ... ┆ 5.64 ┆ 1.04 ┆ 3.92 ┆ 1065 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
||||
│ 1 ┆ 13.2 ┆ 1.78 ┆ 2.14 ┆ ... ┆ 4.38 ┆ 1.05 ┆ 3.4 ┆ 1050 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
||||
│ 1 ┆ 13.16 ┆ 2.36 ┆ 2.67 ┆ ... ┆ 5.68 ┆ 1.03 ┆ 3.17 ┆ 1185 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
|
||||
│ 1 ┆ 14.37 ┆ 1.95 ┆ 2.5 ┆ ... ┆ 7.8 ┆ 0.86 ┆ 3.45 ┆ 1480 │
|
||||
└──────┴─────────┴────────────┴──────┴─────┴───────────┴──────┴──────┴─────────┘
|
||||
```
|
||||
|
||||
If you don't use any option that formats the output of the results, `dr` outputs Arrow's IPC format, meaning that multiple `dr` calls can be efficiently chained with very low overhead. The following script loads one month of NY taxi data and executes two sql queries on the data.
|
||||
|
||||
```
|
||||
$ dr rpq data/yellow_tripdata_2014-01.parquet \
|
||||
-q "select count(1) as cnt, passenger_count from this group by passenger_count" \
|
||||
| dr sql "select * from this order by cnt desc" \
|
||||
| dr print
|
||||
┌─────────┬─────────────────┐
|
||||
│ cnt ┆ passenger_count │
|
||||
│ --- ┆ --- │
|
||||
│ u32 ┆ i64 │
|
||||
╞═════════╪═════════════════╡
|
||||
│ 9727321 ┆ 1 │
|
||||
├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 1891588 ┆ 2 │
|
||||
├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 789070 ┆ 5 │
|
||||
├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 566248 ┆ 3 │
|
||||
├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ ... ┆ ... │
|
||||
├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 19 ┆ 208 │
|
||||
├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 16 ┆ 9 │
|
||||
├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 7 ┆ 7 │
|
||||
├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 5 ┆ 8 │
|
||||
└─────────┴─────────────────┘
|
||||
```
|
||||
|
||||
### Operate with SQL databases
|
||||
|
||||
How many times did you have to insert a csv file (sometimes larger than memory) to a database? Tens of times? Hundreds? You've probably used Pandas for that, since it can infer the table's datatypes. So a simple data operation becomes a python script with Pandas and a driver for PostgreSQL as dependencies.
|
||||
|
||||
Now dr can provide the table creation statement with a handful of columns:
|
||||
|
||||
```
|
||||
$ head wine.csv | dr schema -i -p -n wine
|
||||
CREATE TABLE IF NOT EXISTS "wine" ( );
|
||||
ALTER TABLE "wine" ADD COLUMN "Wine" integer;
|
||||
ALTER TABLE "wine" ADD COLUMN "Alcohol" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Malic.acid" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Ash" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Acl" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Mg" integer;
|
||||
ALTER TABLE "wine" ADD COLUMN "Phenols" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Flavanoids" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Nonflavanoid.phenols" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Proanth" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Color.int" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Hue" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "OD" real;
|
||||
ALTER TABLE "wine" ADD COLUMN "Proline" integer;
|
||||
```
|
||||
|
||||
More about this in the Examples section
|
||||
|
||||
Since most databases can ingest and spit CSV files, some simple operations can be enhanced with dr, like storing the results of a query in a parquet file
|
||||
|
||||
```
|
||||
$ psql -c "copy (select * from wine) to stdout with (FORMAT 'csv', HEADER)" | dr csv -i -P wine.pq
|
||||
```
|
||||
|
||||
## Reference
|
||||
|
||||
Some commands that generate raw output in ipc format.
|
||||
|
||||
* Read a csv or parquet file and print the header: `dr {csv, rpq} [file] -a`
|
||||
* Read a csv or parquet file, execute a SQL statement, and output the results in stdout using Arrow's ipc format `dr {csv, rpq} [file] -q "statement"`
|
||||
* Read a csv or parquet file and print a summary of each column: `dr {csv, rpq} [file] -s "[query]"`
|
||||
* Read a csv or parquet file, execute a query, and output the results in stdout using the csv format `dr {csv, rpq} [file] -s "[query]" -t`
|
||||
* Read a csv and write a parquet file with the same contents: `dr csv [file.csv] -P [file.pq]`
|
||||
|
||||
Some commands that convert raw input in ipc format
|
||||
|
||||
* Read from stdin in ipc and pretty print the table: `dr print`
|
||||
* Read from stdin in csv and pretty print the table: `dr print -t`
|
||||
* Read from stdin in ipc and write the data in parquet: `dr wpq [file.pq]`
|
||||
|
||||
Some commands that read csv data from stdin
|
||||
|
||||
* Read csv from stdin and print the schema as it would be inserted in a postgresql database: `dr schema -i -p -n tablename`
|
||||
* Reas csv from stdin and save as parquet, inferring types: `dr csv -i -P filename.pq`
|
||||
|
||||
## Examples
|
||||
|
||||
### Inserting CSV into postgres
|
||||
|
||||
Assume that you were given a large (several GiB) with a weird (latin1) encoding, and you want to insert it into postgres. This dataset may be too large to store it in memory in one go, so you'd like to stream it into the database. You need to
|
||||
|
||||
* Read the csv file
|
||||
* Infer the schema, and create a table
|
||||
* Change the encoding of the file to the same as the database
|
||||
|
||||
You can use `dr` to turn this into a two-step process, and pipe the encoding conversion in one go. The first step would be to infer the schema of the resulting table and creating the table
|
||||
|
||||
```
|
||||
$ head large_csv_file.csv | iconv -f latin1 -t utf-8 | dr schema -i -p -n tablename | pgsql -U username -h hostname database
|
||||
```
|
||||
|
||||
The second step would be leveraging the `pgsql` command to write the contents of the file into the database
|
||||
|
||||
```
|
||||
$ cat large_csv_file.csv | iconv -f latin1 -t UTF-8 | psql -U username -h hostname -c "\copy tablename from stdin with (FORMAT 'csv', HEADER)" database
|
||||
```
|
||||
|
||||
The ingestion process is atomic, meaning that if `pgsql` fails to insert any record, no insertions will be made at all. If the insertion fails, probably because some column of type varchar can't fit the inferred type, you can change the type with:
|
||||
|
||||
```
|
||||
$ psql -U username -h hostname -c 'alter table tablename alter column "LongDescription" type varchar(1024);' database
|
||||
```
|
||||
|
||||
And try inserting again
|
||||
|
||||
## Performance
|
||||
|
||||
This command runs two dr processes. The first one makes an aggregation on a compressed parquet file of 144MB of size, and the second one just orders the result:
|
||||
`dr` is implemented in Rust with the goal of achieving the highest possible performance. Take for instance a simple read, groupby, and aggregate operation with ~30MB of data:
|
||||
|
||||
```
|
||||
$ dr rpq data/yellow_tripdata_2014-01.parquet \
|
||||
-q "select count(1) as cnt, passenger_count from this group by passenger_count" \
|
||||
| dr sql "select * from this order by cnt desc" \
|
||||
> /dev/null
|
||||
```bash
|
||||
$ time cat data/walmart_train.csv | dr sql "select Dept, avg("Weekly_Sales") from this group by Dept" | dr print
|
||||
shape: (81, 2)
|
||||
┌──────┬──────────────┐
|
||||
│ Dept ┆ Weekly_Sales │
|
||||
│ --- ┆ --- │
|
||||
│ i64 ┆ f64 │
|
||||
╞══════╪══════════════╡
|
||||
│ 30 ┆ 4118.197208 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 16 ┆ 14245.63827 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 56 ┆ 3833.706211 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 24 ┆ 6353.604562 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ ... ┆ ... │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 31 ┆ 2339.440287 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 59 ┆ 694.463564 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 27 ┆ 1583.437727 │
|
||||
├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
|
||||
│ 77 ┆ 328.9618 │
|
||||
└──────┴──────────────┘
|
||||
|
||||
real 0m0.089s
|
||||
user 0m0.116s
|
||||
sys 0m0.036s
|
||||
```
|
||||
|
||||
On a very very old machine (Intel(R) Core(TM) i5-6500T CPU @ 2.50GHz), this takes around half a second, which is roughly the time needed to read and decompress the parquet file. Polar's csv and parquet readers have some decent performance, so you can count on `dr` to be one of the fastest in the block.
|
||||
Let's compare that with the followint Python script that leverages Pandas to read the data, and compute the aggregation:
|
||||
|
||||
## Caveats
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
1. `dr` uses Polars to build and transform dataframes in Rust, and the entire table may be loaded in memory. At the time when `dr` was created, streaming support didn't get along very well with SQL contexts.
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_csv(sys.stdin)
|
||||
print(df.groupby("Dept", sort=False, as_index=False).Weekly_Sales.mean())
|
||||
```
|
||||
|
||||
```bash
|
||||
$ time cat data/walmart_train.csv | ./python/group.py
|
||||
Dept Weekly_Sales
|
||||
0 1 19213.485088
|
||||
1 2 43607.020113
|
||||
2 3 11793.698516
|
||||
3 4 25974.630238
|
||||
4 5 21365.583515
|
||||
.. ... ...
|
||||
76 99 415.487065
|
||||
77 39 11.123750
|
||||
78 50 2658.897010
|
||||
79 43 1.193333
|
||||
80 65 45441.706224
|
||||
|
||||
[81 rows x 2 columns]
|
||||
|
||||
real 0m0.717s
|
||||
user 0m0.627s
|
||||
sys 0m0.282s
|
||||
```
|
||||
|
||||
Note that there's roughly a 6x speedup. This considering that this operation in particular is heavily optimized in Pandas and most of the run time is spent in parsing and reading from stdin.
|
||||
|
||||
2. `dr` uses Polars' SQLContext to execute the query which supports a small subset of the SQL language.
|
||||
|
||||
## Built standing on the shoulders of giants
|
||||
|
||||
|
|
295
notebooks/csv.ipynb
Normal file
295
notebooks/csv.ipynb
Normal file
|
@ -0,0 +1,295 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "aa0bc87a-7fca-432d-9ad0-27855dfbc597",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
":dep csv = \"1.1\"\n",
|
||||
":dep serde = \"1.0\"\n",
|
||||
":dep polars = \"0.25\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "ab0babdd-8482-4248-bf3b-33ab0aad9e07",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "Error",
|
||||
"evalue": "unresolved import `polars::preamble`",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m[E0432] Error:\u001b[0m unresolved import `polars::preamble`"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"use std::fs::File;\n",
|
||||
"use csv;\n",
|
||||
"use std::collections::HashMap\n",
|
||||
"use polars::preamble::*"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "561048d0-6982-4a87-97b8-4c31179cddf4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ByteRecord([\"1\", \"14.23\", \"1.71\", \"2.43\", \"15.6\", \"127\", \"2.8\", \"3.06\", \".28\", \"2.29\", \"5.64\", \"1.04\", \"3.92\", \"1065\"])\n",
|
||||
"ByteRecord([\"1\", \"13.2\", \"1.78\", \"2.14\", \"11.2\", \"100\", \"2.65\", \"2.76\", \".26\", \"1.28\", \"4.38\", \"1.05\", \"3.4\", \"1050\"])\n",
|
||||
"ByteRecord([\"1\", \"13.16\", \"2.36\", \"2.67\", \"18.6\", \"101\", \"2.8\", \"3.24\", \".3\", \"2.81\", \"5.68\", \"1.03\", \"3.17\", \"1185\"])\n",
|
||||
"ByteRecord([\"1\", \"14.37\", \"1.95\", \"2.5\", \"16.8\", \"113\", \"3.85\", \"3.49\", \".24\", \"2.18\", \"7.8\", \".86\", \"3.45\", \"1480\"])\n",
|
||||
"ByteRecord([\"1\", \"13.24\", \"2.59\", \"2.87\", \"21\", \"118\", \"2.8\", \"2.69\", \".39\", \"1.82\", \"4.32\", \"1.04\", \"2.93\", \"735\"])\n",
|
||||
"ByteRecord([\"1\", \"14.2\", \"1.76\", \"2.45\", \"15.2\", \"112\", \"3.27\", \"3.39\", \".34\", \"1.97\", \"6.75\", \"1.05\", \"2.85\", \"1450\"])\n",
|
||||
"ByteRecord([\"1\", \"14.39\", \"1.87\", \"2.45\", \"14.6\", \"96\", \"2.5\", \"2.52\", \".3\", \"1.98\", \"5.25\", \"1.02\", \"3.58\", \"1290\"])\n",
|
||||
"ByteRecord([\"1\", \"14.06\", \"2.15\", \"2.61\", \"17.6\", \"121\", \"2.6\", \"2.51\", \".31\", \"1.25\", \"5.05\", \"1.06\", \"3.58\", \"1295\"])\n",
|
||||
"ByteRecord([\"1\", \"14.83\", \"1.64\", \"2.17\", \"14\", \"97\", \"2.8\", \"2.98\", \".29\", \"1.98\", \"5.2\", \"1.08\", \"2.85\", \"1045\"])\n",
|
||||
"ByteRecord([\"1\", \"13.86\", \"1.35\", \"2.27\", \"16\", \"98\", \"2.98\", \"3.15\", \".22\", \"1.85\", \"7.22\", \"1.01\", \"3.55\", \"1045\"])\n",
|
||||
"ByteRecord([\"1\", \"14.1\", \"2.16\", \"2.3\", \"18\", \"105\", \"2.95\", \"3.32\", \".22\", \"2.38\", \"5.75\", \"1.25\", \"3.17\", \"1510\"])\n",
|
||||
"ByteRecord([\"1\", \"14.12\", \"1.48\", \"2.32\", \"16.8\", \"95\", \"2.2\", \"2.43\", \".26\", \"1.57\", \"5\", \"1.17\", \"2.82\", \"1280\"])\n",
|
||||
"ByteRecord([\"1\", \"13.75\", \"1.73\", \"2.41\", \"16\", \"89\", \"2.6\", \"2.76\", \".29\", \"1.81\", \"5.6\", \"1.15\", \"2.9\", \"1320\"])\n",
|
||||
"ByteRecord([\"1\", \"14.75\", \"1.73\", \"2.39\", \"11.4\", \"91\", \"3.1\", \"3.69\", \".43\", \"2.81\", \"5.4\", \"1.25\", \"2.73\", \"1150\"])\n",
|
||||
"ByteRecord([\"1\", \"14.38\", \"1.87\", \"2.38\", \"12\", \"102\", \"3.3\", \"3.64\", \".29\", \"2.96\", \"7.5\", \"1.2\", \"3\", \"1547\"])\n",
|
||||
"ByteRecord([\"1\", \"13.63\", \"1.81\", \"2.7\", \"17.2\", \"112\", \"2.85\", \"2.91\", \".3\", \"1.46\", \"7.3\", \"1.28\", \"2.88\", \"1310\"])\n",
|
||||
"ByteRecord([\"1\", \"14.3\", \"1.92\", \"2.72\", \"20\", \"120\", \"2.8\", \"3.14\", \".33\", \"1.97\", \"6.2\", \"1.07\", \"2.65\", \"1280\"])\n",
|
||||
"ByteRecord([\"1\", \"13.83\", \"1.57\", \"2.62\", \"20\", \"115\", \"2.95\", \"3.4\", \".4\", \"1.72\", \"6.6\", \"1.13\", \"2.57\", \"1130\"])\n",
|
||||
"ByteRecord([\"1\", \"14.19\", \"1.59\", \"2.48\", \"16.5\", \"108\", \"3.3\", \"3.93\", \".32\", \"1.86\", \"8.7\", \"1.23\", \"2.82\", \"1680\"])\n",
|
||||
"ByteRecord([\"1\", \"13.64\", \"3.1\", \"2.56\", \"15.2\", \"116\", \"2.7\", \"3.03\", \".17\", \"1.66\", \"5.1\", \".96\", \"3.36\", \"845\"])\n",
|
||||
"ByteRecord([\"1\", \"14.06\", \"1.63\", \"2.28\", \"16\", \"126\", \"3\", \"3.17\", \".24\", \"2.1\", \"5.65\", \"1.09\", \"3.71\", \"780\"])\n",
|
||||
"ByteRecord([\"1\", \"12.93\", \"3.8\", \"2.65\", \"18.6\", \"102\", \"2.41\", \"2.41\", \".25\", \"1.98\", \"4.5\", \"1.03\", \"3.52\", \"770\"])\n",
|
||||
"ByteRecord([\"1\", \"13.71\", \"1.86\", \"2.36\", \"16.6\", \"101\", \"2.61\", \"2.88\", \".27\", \"1.69\", \"3.8\", \"1.11\", \"4\", \"1035\"])\n",
|
||||
"ByteRecord([\"1\", \"12.85\", \"1.6\", \"2.52\", \"17.8\", \"95\", \"2.48\", \"2.37\", \".26\", \"1.46\", \"3.93\", \"1.09\", \"3.63\", \"1015\"])\n",
|
||||
"ByteRecord([\"1\", \"13.5\", \"1.81\", \"2.61\", \"20\", \"96\", \"2.53\", \"2.61\", \".28\", \"1.66\", \"3.52\", \"1.12\", \"3.82\", \"845\"])\n",
|
||||
"ByteRecord([\"1\", \"13.05\", \"2.05\", \"3.22\", \"25\", \"124\", \"2.63\", \"2.68\", \".47\", \"1.92\", \"3.58\", \"1.13\", \"3.2\", \"830\"])\n",
|
||||
"ByteRecord([\"1\", \"13.39\", \"1.77\", \"2.62\", \"16.1\", \"93\", \"2.85\", \"2.94\", \".34\", \"1.45\", \"4.8\", \".92\", \"3.22\", \"1195\"])\n",
|
||||
"ByteRecord([\"1\", \"13.3\", \"1.72\", \"2.14\", \"17\", \"94\", \"2.4\", \"2.19\", \".27\", \"1.35\", \"3.95\", \"1.02\", \"2.77\", \"1285\"])\n",
|
||||
"ByteRecord([\"1\", \"13.87\", \"1.9\", \"2.8\", \"19.4\", \"107\", \"2.95\", \"2.97\", \".37\", \"1.76\", \"4.5\", \"1.25\", \"3.4\", \"915\"])\n",
|
||||
"ByteRecord([\"1\", \"14.02\", \"1.68\", \"2.21\", \"16\", \"96\", \"2.65\", \"2.33\", \".26\", \"1.98\", \"4.7\", \"1.04\", \"3.59\", \"1035\"])\n",
|
||||
"ByteRecord([\"1\", \"13.73\", \"1.5\", \"2.7\", \"22.5\", \"101\", \"3\", \"3.25\", \".29\", \"2.38\", \"5.7\", \"1.19\", \"2.71\", \"1285\"])\n",
|
||||
"ByteRecord([\"1\", \"13.58\", \"1.66\", \"2.36\", \"19.1\", \"106\", \"2.86\", \"3.19\", \".22\", \"1.95\", \"6.9\", \"1.09\", \"2.88\", \"1515\"])\n",
|
||||
"ByteRecord([\"1\", \"13.68\", \"1.83\", \"2.36\", \"17.2\", \"104\", \"2.42\", \"2.69\", \".42\", \"1.97\", \"3.84\", \"1.23\", \"2.87\", \"990\"])\n",
|
||||
"ByteRecord([\"1\", \"13.76\", \"1.53\", \"2.7\", \"19.5\", \"132\", \"2.95\", \"2.74\", \".5\", \"1.35\", \"5.4\", \"1.25\", \"3\", \"1235\"])\n",
|
||||
"ByteRecord([\"1\", \"13.51\", \"1.8\", \"2.65\", \"19\", \"110\", \"2.35\", \"2.53\", \".29\", \"1.54\", \"4.2\", \"1.1\", \"2.87\", \"1095\"])\n",
|
||||
"ByteRecord([\"1\", \"13.48\", \"1.81\", \"2.41\", \"20.5\", \"100\", \"2.7\", \"2.98\", \".26\", \"1.86\", \"5.1\", \"1.04\", \"3.47\", \"920\"])\n",
|
||||
"ByteRecord([\"1\", \"13.28\", \"1.64\", \"2.84\", \"15.5\", \"110\", \"2.6\", \"2.68\", \".34\", \"1.36\", \"4.6\", \"1.09\", \"2.78\", \"880\"])\n",
|
||||
"ByteRecord([\"1\", \"13.05\", \"1.65\", \"2.55\", \"18\", \"98\", \"2.45\", \"2.43\", \".29\", \"1.44\", \"4.25\", \"1.12\", \"2.51\", \"1105\"])\n",
|
||||
"ByteRecord([\"1\", \"13.07\", \"1.5\", \"2.1\", \"15.5\", \"98\", \"2.4\", \"2.64\", \".28\", \"1.37\", \"3.7\", \"1.18\", \"2.69\", \"1020\"])\n",
|
||||
"ByteRecord([\"1\", \"14.22\", \"3.99\", \"2.51\", \"13.2\", \"128\", \"3\", \"3.04\", \".2\", \"2.08\", \"5.1\", \".89\", \"3.53\", \"760\"])\n",
|
||||
"ByteRecord([\"1\", \"13.56\", \"1.71\", \"2.31\", \"16.2\", \"117\", \"3.15\", \"3.29\", \".34\", \"2.34\", \"6.13\", \".95\", \"3.38\", \"795\"])\n",
|
||||
"ByteRecord([\"1\", \"13.41\", \"3.84\", \"2.12\", \"18.8\", \"90\", \"2.45\", \"2.68\", \".27\", \"1.48\", \"4.28\", \".91\", \"3\", \"1035\"])\n",
|
||||
"ByteRecord([\"1\", \"13.88\", \"1.89\", \"2.59\", \"15\", \"101\", \"3.25\", \"3.56\", \".17\", \"1.7\", \"5.43\", \".88\", \"3.56\", \"1095\"])\n",
|
||||
"ByteRecord([\"1\", \"13.24\", \"3.98\", \"2.29\", \"17.5\", \"103\", \"2.64\", \"2.63\", \".32\", \"1.66\", \"4.36\", \".82\", \"3\", \"680\"])\n",
|
||||
"ByteRecord([\"1\", \"13.05\", \"1.77\", \"2.1\", \"17\", \"107\", \"3\", \"3\", \".28\", \"2.03\", \"5.04\", \".88\", \"3.35\", \"885\"])\n",
|
||||
"ByteRecord([\"1\", \"14.21\", \"4.04\", \"2.44\", \"18.9\", \"111\", \"2.85\", \"2.65\", \".3\", \"1.25\", \"5.24\", \".87\", \"3.33\", \"1080\"])\n",
|
||||
"ByteRecord([\"1\", \"14.38\", \"3.59\", \"2.28\", \"16\", \"102\", \"3.25\", \"3.17\", \".27\", \"2.19\", \"4.9\", \"1.04\", \"3.44\", \"1065\"])\n",
|
||||
"ByteRecord([\"1\", \"13.9\", \"1.68\", \"2.12\", \"16\", \"101\", \"3.1\", \"3.39\", \".21\", \"2.14\", \"6.1\", \".91\", \"3.33\", \"985\"])\n",
|
||||
"ByteRecord([\"1\", \"14.1\", \"2.02\", \"2.4\", \"18.8\", \"103\", \"2.75\", \"2.92\", \".32\", \"2.38\", \"6.2\", \"1.07\", \"2.75\", \"1060\"])\n",
|
||||
"ByteRecord([\"1\", \"13.94\", \"1.73\", \"2.27\", \"17.4\", \"108\", \"2.88\", \"3.54\", \".32\", \"2.08\", \"8.90\", \"1.12\", \"3.1\", \"1260\"])\n",
|
||||
"ByteRecord([\"1\", \"13.05\", \"1.73\", \"2.04\", \"12.4\", \"92\", \"2.72\", \"3.27\", \".17\", \"2.91\", \"7.2\", \"1.12\", \"2.91\", \"1150\"])\n",
|
||||
"ByteRecord([\"1\", \"13.83\", \"1.65\", \"2.6\", \"17.2\", \"94\", \"2.45\", \"2.99\", \".22\", \"2.29\", \"5.6\", \"1.24\", \"3.37\", \"1265\"])\n",
|
||||
"ByteRecord([\"1\", \"13.82\", \"1.75\", \"2.42\", \"14\", \"111\", \"3.88\", \"3.74\", \".32\", \"1.87\", \"7.05\", \"1.01\", \"3.26\", \"1190\"])\n",
|
||||
"ByteRecord([\"1\", \"13.77\", \"1.9\", \"2.68\", \"17.1\", \"115\", \"3\", \"2.79\", \".39\", \"1.68\", \"6.3\", \"1.13\", \"2.93\", \"1375\"])\n",
|
||||
"ByteRecord([\"1\", \"13.74\", \"1.67\", \"2.25\", \"16.4\", \"118\", \"2.6\", \"2.9\", \".21\", \"1.62\", \"5.85\", \".92\", \"3.2\", \"1060\"])\n",
|
||||
"ByteRecord([\"1\", \"13.56\", \"1.73\", \"2.46\", \"20.5\", \"116\", \"2.96\", \"2.78\", \".2\", \"2.45\", \"6.25\", \".98\", \"3.03\", \"1120\"])\n",
|
||||
"ByteRecord([\"1\", \"14.22\", \"1.7\", \"2.3\", \"16.3\", \"118\", \"3.2\", \"3\", \".26\", \"2.03\", \"6.38\", \".94\", \"3.31\", \"970\"])\n",
|
||||
"ByteRecord([\"1\", \"13.29\", \"1.97\", \"2.68\", \"16.8\", \"102\", \"3\", \"3.23\", \".31\", \"1.66\", \"6\", \"1.07\", \"2.84\", \"1270\"])\n",
|
||||
"ByteRecord([\"1\", \"13.72\", \"1.43\", \"2.5\", \"16.7\", \"108\", \"3.4\", \"3.67\", \".19\", \"2.04\", \"6.8\", \".89\", \"2.87\", \"1285\"])\n",
|
||||
"ByteRecord([\"2\", \"12.37\", \".94\", \"1.36\", \"10.6\", \"88\", \"1.98\", \".57\", \".28\", \".42\", \"1.95\", \"1.05\", \"1.82\", \"520\"])\n",
|
||||
"ByteRecord([\"2\", \"12.33\", \"1.1\", \"2.28\", \"16\", \"101\", \"2.05\", \"1.09\", \".63\", \".41\", \"3.27\", \"1.25\", \"1.67\", \"680\"])\n",
|
||||
"ByteRecord([\"2\", \"12.64\", \"1.36\", \"2.02\", \"16.8\", \"100\", \"2.02\", \"1.41\", \".53\", \".62\", \"5.75\", \".98\", \"1.59\", \"450\"])\n",
|
||||
"ByteRecord([\"2\", \"13.67\", \"1.25\", \"1.92\", \"18\", \"94\", \"2.1\", \"1.79\", \".32\", \".73\", \"3.8\", \"1.23\", \"2.46\", \"630\"])\n",
|
||||
"ByteRecord([\"2\", \"12.37\", \"1.13\", \"2.16\", \"19\", \"87\", \"3.5\", \"3.1\", \".19\", \"1.87\", \"4.45\", \"1.22\", \"2.87\", \"420\"])\n",
|
||||
"ByteRecord([\"2\", \"12.17\", \"1.45\", \"2.53\", \"19\", \"104\", \"1.89\", \"1.75\", \".45\", \"1.03\", \"2.95\", \"1.45\", \"2.23\", \"355\"])\n",
|
||||
"ByteRecord([\"2\", \"12.37\", \"1.21\", \"2.56\", \"18.1\", \"98\", \"2.42\", \"2.65\", \".37\", \"2.08\", \"4.6\", \"1.19\", \"2.3\", \"678\"])\n",
|
||||
"ByteRecord([\"2\", \"13.11\", \"1.01\", \"1.7\", \"15\", \"78\", \"2.98\", \"3.18\", \".26\", \"2.28\", \"5.3\", \"1.12\", \"3.18\", \"502\"])\n",
|
||||
"ByteRecord([\"2\", \"12.37\", \"1.17\", \"1.92\", \"19.6\", \"78\", \"2.11\", \"2\", \".27\", \"1.04\", \"4.68\", \"1.12\", \"3.48\", \"510\"])\n",
|
||||
"ByteRecord([\"2\", \"13.34\", \".94\", \"2.36\", \"17\", \"110\", \"2.53\", \"1.3\", \".55\", \".42\", \"3.17\", \"1.02\", \"1.93\", \"750\"])\n",
|
||||
"ByteRecord([\"2\", \"12.21\", \"1.19\", \"1.75\", \"16.8\", \"151\", \"1.85\", \"1.28\", \".14\", \"2.5\", \"2.85\", \"1.28\", \"3.07\", \"718\"])\n",
|
||||
"ByteRecord([\"2\", \"12.29\", \"1.61\", \"2.21\", \"20.4\", \"103\", \"1.1\", \"1.02\", \".37\", \"1.46\", \"3.05\", \".906\", \"1.82\", \"870\"])\n",
|
||||
"ByteRecord([\"2\", \"13.86\", \"1.51\", \"2.67\", \"25\", \"86\", \"2.95\", \"2.86\", \".21\", \"1.87\", \"3.38\", \"1.36\", \"3.16\", \"410\"])\n",
|
||||
"ByteRecord([\"2\", \"13.49\", \"1.66\", \"2.24\", \"24\", \"87\", \"1.88\", \"1.84\", \".27\", \"1.03\", \"3.74\", \".98\", \"2.78\", \"472\"])\n",
|
||||
"ByteRecord([\"2\", \"12.99\", \"1.67\", \"2.6\", \"30\", \"139\", \"3.3\", \"2.89\", \".21\", \"1.96\", \"3.35\", \"1.31\", \"3.5\", \"985\"])\n",
|
||||
"ByteRecord([\"2\", \"11.96\", \"1.09\", \"2.3\", \"21\", \"101\", \"3.38\", \"2.14\", \".13\", \"1.65\", \"3.21\", \".99\", \"3.13\", \"886\"])\n",
|
||||
"ByteRecord([\"2\", \"11.66\", \"1.88\", \"1.92\", \"16\", \"97\", \"1.61\", \"1.57\", \".34\", \"1.15\", \"3.8\", \"1.23\", \"2.14\", \"428\"])\n",
|
||||
"ByteRecord([\"2\", \"13.03\", \".9\", \"1.71\", \"16\", \"86\", \"1.95\", \"2.03\", \".24\", \"1.46\", \"4.6\", \"1.19\", \"2.48\", \"392\"])\n",
|
||||
"ByteRecord([\"2\", \"11.84\", \"2.89\", \"2.23\", \"18\", \"112\", \"1.72\", \"1.32\", \".43\", \".95\", \"2.65\", \".96\", \"2.52\", \"500\"])\n",
|
||||
"ByteRecord([\"2\", \"12.33\", \".99\", \"1.95\", \"14.8\", \"136\", \"1.9\", \"1.85\", \".35\", \"2.76\", \"3.4\", \"1.06\", \"2.31\", \"750\"])\n",
|
||||
"ByteRecord([\"2\", \"12.7\", \"3.87\", \"2.4\", \"23\", \"101\", \"2.83\", \"2.55\", \".43\", \"1.95\", \"2.57\", \"1.19\", \"3.13\", \"463\"])\n",
|
||||
"ByteRecord([\"2\", \"12\", \".92\", \"2\", \"19\", \"86\", \"2.42\", \"2.26\", \".3\", \"1.43\", \"2.5\", \"1.38\", \"3.12\", \"278\"])\n",
|
||||
"ByteRecord([\"2\", \"12.72\", \"1.81\", \"2.2\", \"18.8\", \"86\", \"2.2\", \"2.53\", \".26\", \"1.77\", \"3.9\", \"1.16\", \"3.14\", \"714\"])\n",
|
||||
"ByteRecord([\"2\", \"12.08\", \"1.13\", \"2.51\", \"24\", \"78\", \"2\", \"1.58\", \".4\", \"1.4\", \"2.2\", \"1.31\", \"2.72\", \"630\"])\n",
|
||||
"ByteRecord([\"2\", \"13.05\", \"3.86\", \"2.32\", \"22.5\", \"85\", \"1.65\", \"1.59\", \".61\", \"1.62\", \"4.8\", \".84\", \"2.01\", \"515\"])\n",
|
||||
"ByteRecord([\"2\", \"11.84\", \".89\", \"2.58\", \"18\", \"94\", \"2.2\", \"2.21\", \".22\", \"2.35\", \"3.05\", \".79\", \"3.08\", \"520\"])\n",
|
||||
"ByteRecord([\"2\", \"12.67\", \".98\", \"2.24\", \"18\", \"99\", \"2.2\", \"1.94\", \".3\", \"1.46\", \"2.62\", \"1.23\", \"3.16\", \"450\"])\n",
|
||||
"ByteRecord([\"2\", \"12.16\", \"1.61\", \"2.31\", \"22.8\", \"90\", \"1.78\", \"1.69\", \".43\", \"1.56\", \"2.45\", \"1.33\", \"2.26\", \"495\"])\n",
|
||||
"ByteRecord([\"2\", \"11.65\", \"1.67\", \"2.62\", \"26\", \"88\", \"1.92\", \"1.61\", \".4\", \"1.34\", \"2.6\", \"1.36\", \"3.21\", \"562\"])\n",
|
||||
"ByteRecord([\"2\", \"11.64\", \"2.06\", \"2.46\", \"21.6\", \"84\", \"1.95\", \"1.69\", \".48\", \"1.35\", \"2.8\", \"1\", \"2.75\", \"680\"])\n",
|
||||
"ByteRecord([\"2\", \"12.08\", \"1.33\", \"2.3\", \"23.6\", \"70\", \"2.2\", \"1.59\", \".42\", \"1.38\", \"1.74\", \"1.07\", \"3.21\", \"625\"])\n",
|
||||
"ByteRecord([\"2\", \"12.08\", \"1.83\", \"2.32\", \"18.5\", \"81\", \"1.6\", \"1.5\", \".52\", \"1.64\", \"2.4\", \"1.08\", \"2.27\", \"480\"])\n",
|
||||
"ByteRecord([\"2\", \"12\", \"1.51\", \"2.42\", \"22\", \"86\", \"1.45\", \"1.25\", \".5\", \"1.63\", \"3.6\", \"1.05\", \"2.65\", \"450\"])\n",
|
||||
"ByteRecord([\"2\", \"12.69\", \"1.53\", \"2.26\", \"20.7\", \"80\", \"1.38\", \"1.46\", \".58\", \"1.62\", \"3.05\", \".96\", \"2.06\", \"495\"])\n",
|
||||
"ByteRecord([\"2\", \"12.29\", \"2.83\", \"2.22\", \"18\", \"88\", \"2.45\", \"2.25\", \".25\", \"1.99\", \"2.15\", \"1.15\", \"3.3\", \"290\"])\n",
|
||||
"ByteRecord([\"2\", \"11.62\", \"1.99\", \"2.28\", \"18\", \"98\", \"3.02\", \"2.26\", \".17\", \"1.35\", \"3.25\", \"1.16\", \"2.96\", \"345\"])\n",
|
||||
"ByteRecord([\"2\", \"12.47\", \"1.52\", \"2.2\", \"19\", \"162\", \"2.5\", \"2.27\", \".32\", \"3.28\", \"2.6\", \"1.16\", \"2.63\", \"937\"])\n",
|
||||
"ByteRecord([\"2\", \"11.81\", \"2.12\", \"2.74\", \"21.5\", \"134\", \"1.6\", \".99\", \".14\", \"1.56\", \"2.5\", \".95\", \"2.26\", \"625\"])\n",
|
||||
"ByteRecord([\"2\", \"12.29\", \"1.41\", \"1.98\", \"16\", \"85\", \"2.55\", \"2.5\", \".29\", \"1.77\", \"2.9\", \"1.23\", \"2.74\", \"428\"])\n",
|
||||
"ByteRecord([\"2\", \"12.37\", \"1.07\", \"2.1\", \"18.5\", \"88\", \"3.52\", \"3.75\", \".24\", \"1.95\", \"4.5\", \"1.04\", \"2.77\", \"660\"])\n",
|
||||
"ByteRecord([\"2\", \"12.29\", \"3.17\", \"2.21\", \"18\", \"88\", \"2.85\", \"2.99\", \".45\", \"2.81\", \"2.3\", \"1.42\", \"2.83\", \"406\"])\n",
|
||||
"ByteRecord([\"2\", \"12.08\", \"2.08\", \"1.7\", \"17.5\", \"97\", \"2.23\", \"2.17\", \".26\", \"1.4\", \"3.3\", \"1.27\", \"2.96\", \"710\"])\n",
|
||||
"ByteRecord([\"2\", \"12.6\", \"1.34\", \"1.9\", \"18.5\", \"88\", \"1.45\", \"1.36\", \".29\", \"1.35\", \"2.45\", \"1.04\", \"2.77\", \"562\"])\n",
|
||||
"ByteRecord([\"2\", \"12.34\", \"2.45\", \"2.46\", \"21\", \"98\", \"2.56\", \"2.11\", \".34\", \"1.31\", \"2.8\", \".8\", \"3.38\", \"438\"])\n",
|
||||
"ByteRecord([\"2\", \"11.82\", \"1.72\", \"1.88\", \"19.5\", \"86\", \"2.5\", \"1.64\", \".37\", \"1.42\", \"2.06\", \".94\", \"2.44\", \"415\"])\n",
|
||||
"ByteRecord([\"2\", \"12.51\", \"1.73\", \"1.98\", \"20.5\", \"85\", \"2.2\", \"1.92\", \".32\", \"1.48\", \"2.94\", \"1.04\", \"3.57\", \"672\"])\n",
|
||||
"ByteRecord([\"2\", \"12.42\", \"2.55\", \"2.27\", \"22\", \"90\", \"1.68\", \"1.84\", \".66\", \"1.42\", \"2.7\", \".86\", \"3.3\", \"315\"])\n",
|
||||
"ByteRecord([\"2\", \"12.25\", \"1.73\", \"2.12\", \"19\", \"80\", \"1.65\", \"2.03\", \".37\", \"1.63\", \"3.4\", \"1\", \"3.17\", \"510\"])\n",
|
||||
"ByteRecord([\"2\", \"12.72\", \"1.75\", \"2.28\", \"22.5\", \"84\", \"1.38\", \"1.76\", \".48\", \"1.63\", \"3.3\", \".88\", \"2.42\", \"488\"])\n",
|
||||
"ByteRecord([\"2\", \"12.22\", \"1.29\", \"1.94\", \"19\", \"92\", \"2.36\", \"2.04\", \".39\", \"2.08\", \"2.7\", \".86\", \"3.02\", \"312\"])\n",
|
||||
"ByteRecord([\"2\", \"11.61\", \"1.35\", \"2.7\", \"20\", \"94\", \"2.74\", \"2.92\", \".29\", \"2.49\", \"2.65\", \".96\", \"3.26\", \"680\"])\n",
|
||||
"ByteRecord([\"2\", \"11.46\", \"3.74\", \"1.82\", \"19.5\", \"107\", \"3.18\", \"2.58\", \".24\", \"3.58\", \"2.9\", \".75\", \"2.81\", \"562\"])\n",
|
||||
"ByteRecord([\"2\", \"12.52\", \"2.43\", \"2.17\", \"21\", \"88\", \"2.55\", \"2.27\", \".26\", \"1.22\", \"2\", \".9\", \"2.78\", \"325\"])\n",
|
||||
"ByteRecord([\"2\", \"11.76\", \"2.68\", \"2.92\", \"20\", \"103\", \"1.75\", \"2.03\", \".6\", \"1.05\", \"3.8\", \"1.23\", \"2.5\", \"607\"])\n",
|
||||
"ByteRecord([\"2\", \"11.41\", \".74\", \"2.5\", \"21\", \"88\", \"2.48\", \"2.01\", \".42\", \"1.44\", \"3.08\", \"1.1\", \"2.31\", \"434\"])\n",
|
||||
"ByteRecord([\"2\", \"12.08\", \"1.39\", \"2.5\", \"22.5\", \"84\", \"2.56\", \"2.29\", \".43\", \"1.04\", \"2.9\", \".93\", \"3.19\", \"385\"])\n",
|
||||
"ByteRecord([\"2\", \"11.03\", \"1.51\", \"2.2\", \"21.5\", \"85\", \"2.46\", \"2.17\", \".52\", \"2.01\", \"1.9\", \"1.71\", \"2.87\", \"407\"])\n",
|
||||
"ByteRecord([\"2\", \"11.82\", \"1.47\", \"1.99\", \"20.8\", \"86\", \"1.98\", \"1.6\", \".3\", \"1.53\", \"1.95\", \".95\", \"3.33\", \"495\"])\n",
|
||||
"ByteRecord([\"2\", \"12.42\", \"1.61\", \"2.19\", \"22.5\", \"108\", \"2\", \"2.09\", \".34\", \"1.61\", \"2.06\", \"1.06\", \"2.96\", \"345\"])\n",
|
||||
"ByteRecord([\"2\", \"12.77\", \"3.43\", \"1.98\", \"16\", \"80\", \"1.63\", \"1.25\", \".43\", \".83\", \"3.4\", \".7\", \"2.12\", \"372\"])\n",
|
||||
"ByteRecord([\"2\", \"12\", \"3.43\", \"2\", \"19\", \"87\", \"2\", \"1.64\", \".37\", \"1.87\", \"1.28\", \".93\", \"3.05\", \"564\"])\n",
|
||||
"ByteRecord([\"2\", \"11.45\", \"2.4\", \"2.42\", \"20\", \"96\", \"2.9\", \"2.79\", \".32\", \"1.83\", \"3.25\", \".8\", \"3.39\", \"625\"])\n",
|
||||
"ByteRecord([\"2\", \"11.56\", \"2.05\", \"3.23\", \"28.5\", \"119\", \"3.18\", \"5.08\", \".47\", \"1.87\", \"6\", \".93\", \"3.69\", \"465\"])\n",
|
||||
"ByteRecord([\"2\", \"12.42\", \"4.43\", \"2.73\", \"26.5\", \"102\", \"2.2\", \"2.13\", \".43\", \"1.71\", \"2.08\", \".92\", \"3.12\", \"365\"])\n",
|
||||
"ByteRecord([\"2\", \"13.05\", \"5.8\", \"2.13\", \"21.5\", \"86\", \"2.62\", \"2.65\", \".3\", \"2.01\", \"2.6\", \".73\", \"3.1\", \"380\"])\n",
|
||||
"ByteRecord([\"2\", \"11.87\", \"4.31\", \"2.39\", \"21\", \"82\", \"2.86\", \"3.03\", \".21\", \"2.91\", \"2.8\", \".75\", \"3.64\", \"380\"])\n",
|
||||
"ByteRecord([\"2\", \"12.07\", \"2.16\", \"2.17\", \"21\", \"85\", \"2.6\", \"2.65\", \".37\", \"1.35\", \"2.76\", \".86\", \"3.28\", \"378\"])\n",
|
||||
"ByteRecord([\"2\", \"12.43\", \"1.53\", \"2.29\", \"21.5\", \"86\", \"2.74\", \"3.15\", \".39\", \"1.77\", \"3.94\", \".69\", \"2.84\", \"352\"])\n",
|
||||
"ByteRecord([\"2\", \"11.79\", \"2.13\", \"2.78\", \"28.5\", \"92\", \"2.13\", \"2.24\", \".58\", \"1.76\", \"3\", \".97\", \"2.44\", \"466\"])\n",
|
||||
"ByteRecord([\"2\", \"12.37\", \"1.63\", \"2.3\", \"24.5\", \"88\", \"2.22\", \"2.45\", \".4\", \"1.9\", \"2.12\", \".89\", \"2.78\", \"342\"])\n",
|
||||
"ByteRecord([\"2\", \"12.04\", \"4.3\", \"2.38\", \"22\", \"80\", \"2.1\", \"1.75\", \".42\", \"1.35\", \"2.6\", \".79\", \"2.57\", \"580\"])\n",
|
||||
"ByteRecord([\"3\", \"12.86\", \"1.35\", \"2.32\", \"18\", \"122\", \"1.51\", \"1.25\", \".21\", \".94\", \"4.1\", \".76\", \"1.29\", \"630\"])\n",
|
||||
"ByteRecord([\"3\", \"12.88\", \"2.99\", \"2.4\", \"20\", \"104\", \"1.3\", \"1.22\", \".24\", \".83\", \"5.4\", \".74\", \"1.42\", \"530\"])\n",
|
||||
"ByteRecord([\"3\", \"12.81\", \"2.31\", \"2.4\", \"24\", \"98\", \"1.15\", \"1.09\", \".27\", \".83\", \"5.7\", \".66\", \"1.36\", \"560\"])\n",
|
||||
"ByteRecord([\"3\", \"12.7\", \"3.55\", \"2.36\", \"21.5\", \"106\", \"1.7\", \"1.2\", \".17\", \".84\", \"5\", \".78\", \"1.29\", \"600\"])\n",
|
||||
"ByteRecord([\"3\", \"12.51\", \"1.24\", \"2.25\", \"17.5\", \"85\", \"2\", \".58\", \".6\", \"1.25\", \"5.45\", \".75\", \"1.51\", \"650\"])\n",
|
||||
"ByteRecord([\"3\", \"12.6\", \"2.46\", \"2.2\", \"18.5\", \"94\", \"1.62\", \".66\", \".63\", \".94\", \"7.1\", \".73\", \"1.58\", \"695\"])\n",
|
||||
"ByteRecord([\"3\", \"12.25\", \"4.72\", \"2.54\", \"21\", \"89\", \"1.38\", \".47\", \".53\", \".8\", \"3.85\", \".75\", \"1.27\", \"720\"])\n",
|
||||
"ByteRecord([\"3\", \"12.53\", \"5.51\", \"2.64\", \"25\", \"96\", \"1.79\", \".6\", \".63\", \"1.1\", \"5\", \".82\", \"1.69\", \"515\"])\n",
|
||||
"ByteRecord([\"3\", \"13.49\", \"3.59\", \"2.19\", \"19.5\", \"88\", \"1.62\", \".48\", \".58\", \".88\", \"5.7\", \".81\", \"1.82\", \"580\"])\n",
|
||||
"ByteRecord([\"3\", \"12.84\", \"2.96\", \"2.61\", \"24\", \"101\", \"2.32\", \".6\", \".53\", \".81\", \"4.92\", \".89\", \"2.15\", \"590\"])\n",
|
||||
"ByteRecord([\"3\", \"12.93\", \"2.81\", \"2.7\", \"21\", \"96\", \"1.54\", \".5\", \".53\", \".75\", \"4.6\", \".77\", \"2.31\", \"600\"])\n",
|
||||
"ByteRecord([\"3\", \"13.36\", \"2.56\", \"2.35\", \"20\", \"89\", \"1.4\", \".5\", \".37\", \".64\", \"5.6\", \".7\", \"2.47\", \"780\"])\n",
|
||||
"ByteRecord([\"3\", \"13.52\", \"3.17\", \"2.72\", \"23.5\", \"97\", \"1.55\", \".52\", \".5\", \".55\", \"4.35\", \".89\", \"2.06\", \"520\"])\n",
|
||||
"ByteRecord([\"3\", \"13.62\", \"4.95\", \"2.35\", \"20\", \"92\", \"2\", \".8\", \".47\", \"1.02\", \"4.4\", \".91\", \"2.05\", \"550\"])\n",
|
||||
"ByteRecord([\"3\", \"12.25\", \"3.88\", \"2.2\", \"18.5\", \"112\", \"1.38\", \".78\", \".29\", \"1.14\", \"8.21\", \".65\", \"2\", \"855\"])\n",
|
||||
"ByteRecord([\"3\", \"13.16\", \"3.57\", \"2.15\", \"21\", \"102\", \"1.5\", \".55\", \".43\", \"1.3\", \"4\", \".6\", \"1.68\", \"830\"])\n",
|
||||
"ByteRecord([\"3\", \"13.88\", \"5.04\", \"2.23\", \"20\", \"80\", \".98\", \".34\", \".4\", \".68\", \"4.9\", \".58\", \"1.33\", \"415\"])\n",
|
||||
"ByteRecord([\"3\", \"12.87\", \"4.61\", \"2.48\", \"21.5\", \"86\", \"1.7\", \".65\", \".47\", \".86\", \"7.65\", \".54\", \"1.86\", \"625\"])\n",
|
||||
"ByteRecord([\"3\", \"13.32\", \"3.24\", \"2.38\", \"21.5\", \"92\", \"1.93\", \".76\", \".45\", \"1.25\", \"8.42\", \".55\", \"1.62\", \"650\"])\n",
|
||||
"ByteRecord([\"3\", \"13.08\", \"3.9\", \"2.36\", \"21.5\", \"113\", \"1.41\", \"1.39\", \".34\", \"1.14\", \"9.40\", \".57\", \"1.33\", \"550\"])\n",
|
||||
"ByteRecord([\"3\", \"13.5\", \"3.12\", \"2.62\", \"24\", \"123\", \"1.4\", \"1.57\", \".22\", \"1.25\", \"8.60\", \".59\", \"1.3\", \"500\"])\n",
|
||||
"ByteRecord([\"3\", \"12.79\", \"2.67\", \"2.48\", \"22\", \"112\", \"1.48\", \"1.36\", \".24\", \"1.26\", \"10.8\", \".48\", \"1.47\", \"480\"])\n",
|
||||
"ByteRecord([\"3\", \"13.11\", \"1.9\", \"2.75\", \"25.5\", \"116\", \"2.2\", \"1.28\", \".26\", \"1.56\", \"7.1\", \".61\", \"1.33\", \"425\"])\n",
|
||||
"ByteRecord([\"3\", \"13.23\", \"3.3\", \"2.28\", \"18.5\", \"98\", \"1.8\", \".83\", \".61\", \"1.87\", \"10.52\", \".56\", \"1.51\", \"675\"])\n",
|
||||
"ByteRecord([\"3\", \"12.58\", \"1.29\", \"2.1\", \"20\", \"103\", \"1.48\", \".58\", \".53\", \"1.4\", \"7.6\", \".58\", \"1.55\", \"640\"])\n",
|
||||
"ByteRecord([\"3\", \"13.17\", \"5.19\", \"2.32\", \"22\", \"93\", \"1.74\", \".63\", \".61\", \"1.55\", \"7.9\", \".6\", \"1.48\", \"725\"])\n",
|
||||
"ByteRecord([\"3\", \"13.84\", \"4.12\", \"2.38\", \"19.5\", \"89\", \"1.8\", \".83\", \".48\", \"1.56\", \"9.01\", \".57\", \"1.64\", \"480\"])\n",
|
||||
"ByteRecord([\"3\", \"12.45\", \"3.03\", \"2.64\", \"27\", \"97\", \"1.9\", \".58\", \".63\", \"1.14\", \"7.5\", \".67\", \"1.73\", \"880\"])\n",
|
||||
"ByteRecord([\"3\", \"14.34\", \"1.68\", \"2.7\", \"25\", \"98\", \"2.8\", \"1.31\", \".53\", \"2.7\", \"13\", \".57\", \"1.96\", \"660\"])\n",
|
||||
"ByteRecord([\"3\", \"13.48\", \"1.67\", \"2.64\", \"22.5\", \"89\", \"2.6\", \"1.1\", \".52\", \"2.29\", \"11.75\", \".57\", \"1.78\", \"620\"])\n",
|
||||
"ByteRecord([\"3\", \"12.36\", \"3.83\", \"2.38\", \"21\", \"88\", \"2.3\", \".92\", \".5\", \"1.04\", \"7.65\", \".56\", \"1.58\", \"520\"])\n",
|
||||
"ByteRecord([\"3\", \"13.69\", \"3.26\", \"2.54\", \"20\", \"107\", \"1.83\", \".56\", \".5\", \".8\", \"5.88\", \".96\", \"1.82\", \"680\"])\n",
|
||||
"ByteRecord([\"3\", \"12.85\", \"3.27\", \"2.58\", \"22\", \"106\", \"1.65\", \".6\", \".6\", \".96\", \"5.58\", \".87\", \"2.11\", \"570\"])\n",
|
||||
"ByteRecord([\"3\", \"12.96\", \"3.45\", \"2.35\", \"18.5\", \"106\", \"1.39\", \".7\", \".4\", \".94\", \"5.28\", \".68\", \"1.75\", \"675\"])\n",
|
||||
"ByteRecord([\"3\", \"13.78\", \"2.76\", \"2.3\", \"22\", \"90\", \"1.35\", \".68\", \".41\", \"1.03\", \"9.58\", \".7\", \"1.68\", \"615\"])\n",
|
||||
"ByteRecord([\"3\", \"13.73\", \"4.36\", \"2.26\", \"22.5\", \"88\", \"1.28\", \".47\", \".52\", \"1.15\", \"6.62\", \".78\", \"1.75\", \"520\"])\n",
|
||||
"ByteRecord([\"3\", \"13.45\", \"3.7\", \"2.6\", \"23\", \"111\", \"1.7\", \".92\", \".43\", \"1.46\", \"10.68\", \".85\", \"1.56\", \"695\"])\n",
|
||||
"ByteRecord([\"3\", \"12.82\", \"3.37\", \"2.3\", \"19.5\", \"88\", \"1.48\", \".66\", \".4\", \".97\", \"10.26\", \".72\", \"1.75\", \"685\"])\n",
|
||||
"ByteRecord([\"3\", \"13.58\", \"2.58\", \"2.69\", \"24.5\", \"105\", \"1.55\", \".84\", \".39\", \"1.54\", \"8.66\", \".74\", \"1.8\", \"750\"])\n",
|
||||
"ByteRecord([\"3\", \"13.4\", \"4.6\", \"2.86\", \"25\", \"112\", \"1.98\", \".96\", \".27\", \"1.11\", \"8.5\", \".67\", \"1.92\", \"630\"])\n",
|
||||
"ByteRecord([\"3\", \"12.2\", \"3.03\", \"2.32\", \"19\", \"96\", \"1.25\", \".49\", \".4\", \".73\", \"5.5\", \".66\", \"1.83\", \"510\"])\n",
|
||||
"ByteRecord([\"3\", \"12.77\", \"2.39\", \"2.28\", \"19.5\", \"86\", \"1.39\", \".51\", \".48\", \".64\", \"9.899999\", \".57\", \"1.63\", \"470\"])\n",
|
||||
"ByteRecord([\"3\", \"14.16\", \"2.51\", \"2.48\", \"20\", \"91\", \"1.68\", \".7\", \".44\", \"1.24\", \"9.7\", \".62\", \"1.71\", \"660\"])\n",
|
||||
"ByteRecord([\"3\", \"13.71\", \"5.65\", \"2.45\", \"20.5\", \"95\", \"1.68\", \".61\", \".52\", \"1.06\", \"7.7\", \".64\", \"1.74\", \"740\"])\n",
|
||||
"ByteRecord([\"3\", \"13.4\", \"3.91\", \"2.48\", \"23\", \"102\", \"1.8\", \".75\", \".43\", \"1.41\", \"7.3\", \".7\", \"1.56\", \"750\"])\n",
|
||||
"ByteRecord([\"3\", \"13.27\", \"4.28\", \"2.26\", \"20\", \"120\", \"1.59\", \".69\", \".43\", \"1.35\", \"10.2\", \".59\", \"1.56\", \"835\"])\n",
|
||||
"ByteRecord([\"3\", \"13.17\", \"2.59\", \"2.37\", \"20\", \"120\", \"1.65\", \".68\", \".53\", \"1.46\", \"9.3\", \".6\", \"1.62\", \"840\"])\n",
|
||||
"ByteRecord([\"3\", \"14.13\", \"4.1\", \"2.74\", \"24.5\", \"96\", \"2.05\", \".76\", \".56\", \"1.35\", \"9.2\", \".61\", \"1.6\", \"560\"])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"()"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"type Record = HashMap<String,String>;\n",
|
||||
"let file = File::open(\"../wine.csv\")?;\n",
|
||||
"let mut r = csv::Reader::from_reader(file);\n",
|
||||
"for result in r.byte_records() {\n",
|
||||
" let record = result?;\n",
|
||||
" println!(\"{:?}\", record)\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "45341565-f0f7-4bb7-a8e4-3623a9eded55",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Reader { core: Reader { dfa: Dfa(N/A), dfa_state: DfaState(0), nfa_state: StartRecord, delimiter: 44, term: CRLF, quote: 34, escape: None, double_quote: true, comment: None, quoting: true, use_nfa: false, line: 1, has_read: false, output_pos: 0 }, rdr: BufReader { reader: File { fd: 4, path: \"/home/jovyan/csvgr/wine.csv\", read: true, write: false }, buffer: 0/8192 }, state: ReaderState { headers: None, has_headers: true, flexible: false, trim: None, first_field_count: None, cur_pos: Position { byte: 0, line: 1, record: 0 }, first: false, seeked: false, eof: NotEof } }"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "01a3cd51-67a6-49b6-8234-726fe4b94b84",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Rust",
|
||||
"language": "rust",
|
||||
"name": "rust"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": "rust",
|
||||
"file_extension": ".rs",
|
||||
"mimetype": "text/rust",
|
||||
"name": "Rust",
|
||||
"pygment_lexer": "rust",
|
||||
"version": ""
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
7
python/group.py
Executable file
7
python/group.py
Executable file
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_csv(sys.stdin)
|
||||
print(df.groupby("Dept", sort=False, as_index=False).Weekly_Sales.mean())
|
8
queries/weekly_sales_by_dept.sql
Normal file
8
queries/weekly_sales_by_dept.sql
Normal file
|
@ -0,0 +1,8 @@
|
|||
select
|
||||
Dept,
|
||||
avg(Weekly_Sales)
|
||||
from
|
||||
this
|
||||
group by
|
||||
Dept
|
||||
|
123
src/commands.rs
123
src/commands.rs
|
@ -1,123 +0,0 @@
|
|||
use clap::{arg, ArgAction, Command};
|
||||
|
||||
// Generate command line options for the csv command
|
||||
pub fn gen_csv_command() -> Command {
|
||||
Command::new("csv")
|
||||
.about("Read csv, output arrow stream")
|
||||
.arg(arg!([path] "Path to CSV file"))
|
||||
.arg(arg!(-d --delimiter <String> "Column delimiter. Assume ,").required(false))
|
||||
.arg(
|
||||
arg!(-i --stdin ... "Read from stdin")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(arg!(-q --query <String> "Execute query on the file").required(false))
|
||||
.arg(
|
||||
arg!(-s --summary ... "Summarize the data")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
arg!(-t --text ... "Output text instead of binary")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(arg!(-P --parquet <String> "Write output as a parquet file").required(false))
|
||||
.arg(
|
||||
arg!(-a --head ... "Print the header of the table")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
}
|
||||
|
||||
// Generate command line options for the schema command
|
||||
pub fn gen_schema_command() -> Command {
|
||||
Command::new("schema")
|
||||
.about("Several table schema related utilities")
|
||||
.arg(
|
||||
arg!(-i --stdin ... "Read from stdin")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(arg!(-d --delimiter <String> "Column delimiter. Assume ,").required(false))
|
||||
.arg(arg!(-n --name <String> "Table name").required(false))
|
||||
.arg(arg!(-l --strlen <String> "Default length for string columns").required(false))
|
||||
.arg(
|
||||
arg!(-s --summary ... "Summarize the schema")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
arg!(-p --postgresql ... "Create a postgresql table with schema")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
}
|
||||
|
||||
// Generate command line options for the sql command
|
||||
pub fn gen_sql_command() -> Command {
|
||||
Command::new("sql")
|
||||
.about("Runs a sql statement on the file")
|
||||
.arg(arg!(-d --delimiter <String> "Column delimiter. Assume ,").required(false))
|
||||
.arg(arg!([statement] "SQL statement"))
|
||||
.arg(
|
||||
arg!(-t --text ... "Input text instead of binary")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(arg!(-d --delimiter <String> "Column delimiter").required(false))
|
||||
}
|
||||
|
||||
// Generate command line options for the rpq command
|
||||
pub fn gen_rpq_command() -> Command {
|
||||
Command::new("rpq")
|
||||
.about("Read parquet file")
|
||||
.arg(arg!([path] "Path to the parquet file"))
|
||||
.arg(arg!(-q --query <String> "Execute query on the file").required(false))
|
||||
.arg(
|
||||
arg!(-s --summary ... "Summarize the data")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
arg!(-i --stdin ... "Read from stdin instead than from a file")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(
|
||||
arg!(-t --text ... "Output text instead of binary")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(arg!(-P --parquet <String> "Write the result as a parquet file").required(false))
|
||||
.arg(
|
||||
arg!(-a --head ... "Print the header of the table")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
}
|
||||
|
||||
// Generate command line options for the wpq command
|
||||
pub fn gen_wpq_command() -> Command {
|
||||
Command::new("wpq")
|
||||
.about("Write to a paquet file")
|
||||
.arg(arg!(-d --delimiter <String> "Column delimiter. Assume ,").required(false))
|
||||
.arg(
|
||||
arg!(-t --text ... "Input text instead of binary")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
.arg(arg!([path] "Path to the new parquet file"))
|
||||
}
|
||||
|
||||
// Generate command line options for the print command
|
||||
pub fn gen_print_command() -> Command {
|
||||
Command::new("print")
|
||||
.about("Pretty prints the table")
|
||||
.arg(arg!(-d --delimiter <String> "Column delimiter. Assume ,").required(false))
|
||||
.arg(
|
||||
arg!(-t --text ... "Inputs csv instead of binary")
|
||||
.required(false)
|
||||
.action(ArgAction::SetTrue),
|
||||
)
|
||||
}
|
150
src/handlers.rs
150
src/handlers.rs
|
@ -1,150 +0,0 @@
|
|||
use crate::io;
|
||||
use crate::schema;
|
||||
use crate::sql;
|
||||
use clap::ArgMatches;
|
||||
use polars_lazy::prelude::LazyFrame;
|
||||
|
||||
// Handle csv command
|
||||
pub fn handle_csv(matches: &ArgMatches) {
|
||||
let delimiter = match matches.get_one::<String>("delimiter") {
|
||||
Some(delimiter) => delimiter.as_bytes()[0],
|
||||
None => b',',
|
||||
};
|
||||
let mut ldf = if matches.get_flag("stdin") {
|
||||
io::load_csv_from_stdin(delimiter)
|
||||
} else {
|
||||
let path = matches
|
||||
.get_one::<String>("path")
|
||||
.expect("Please, provide a file");
|
||||
io::read_csv(path.to_string(), delimiter)
|
||||
};
|
||||
if let Some(query) = matches.get_one::<String>("query") {
|
||||
ldf = sql::execute(ldf, query);
|
||||
}
|
||||
if matches.get_flag("summary") {
|
||||
let df = ldf.collect().expect("Could not collect");
|
||||
println!("{:?}", df.describe(None));
|
||||
} else if matches.get_flag("head") {
|
||||
let df = ldf.fetch(5).expect("Could not fetch");
|
||||
println!("{}", df)
|
||||
} else {
|
||||
if matches.get_flag("text") {
|
||||
io::dump_csv_to_stdout(ldf);
|
||||
} else {
|
||||
if let Some(path) = matches.get_one::<String>("parquet") {
|
||||
io::write_parquet(ldf, path.to_string());
|
||||
} else {
|
||||
io::write_ipc(ldf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the SQL command
|
||||
pub fn handle_sql(matches: &ArgMatches) {
|
||||
let delimiter = match matches.get_one::<String>("delimiter") {
|
||||
Some(delimiter) => delimiter.as_bytes()[0],
|
||||
None => b',',
|
||||
};
|
||||
if let Some(statement) = matches.get_one::<String>("statement") {
|
||||
let ldf = if matches.get_flag("text") {
|
||||
io::load_csv_from_stdin(delimiter)
|
||||
} else {
|
||||
io::read_ipc()
|
||||
};
|
||||
let res = sql::execute(ldf, statement);
|
||||
io::write_ipc(res);
|
||||
} else {
|
||||
io::write_ipc(io::read_ipc());
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the print command
|
||||
pub fn handle_print(matches: &ArgMatches) {
|
||||
let delimiter = match matches.get_one::<String>("delimiter") {
|
||||
Some(delimiter) => delimiter.as_bytes()[0],
|
||||
None => b',',
|
||||
};
|
||||
let df = if matches.get_flag("text") {
|
||||
io::load_csv_from_stdin(delimiter)
|
||||
} else {
|
||||
io::read_ipc()
|
||||
};
|
||||
println!("{}", df.collect().expect("Could not collect"));
|
||||
}
|
||||
|
||||
// Handle the rpq command
|
||||
pub fn handle_rpq(matches: &ArgMatches) {
|
||||
let mut ldf = LazyFrame::default();
|
||||
if matches.get_flag("stdin") {
|
||||
ldf = io::load_parquet_from_stdin();
|
||||
} else if let Some(path) = matches.get_one::<String>("path") {
|
||||
ldf = io::read_parquet(path.to_string());
|
||||
} else {
|
||||
eprintln!("File not found or not reading from stdin")
|
||||
}
|
||||
if let Some(query) = matches.get_one::<String>("query") {
|
||||
ldf = sql::execute(ldf, query);
|
||||
}
|
||||
if matches.get_flag("summary") {
|
||||
let df = ldf.collect().expect("Could not collect");
|
||||
println!("{:?}", df.describe(None));
|
||||
} else if matches.get_flag("head") {
|
||||
let df = ldf.fetch(5).expect("Could not fetch");
|
||||
println!("{}", df)
|
||||
} else {
|
||||
if matches.get_flag("text") {
|
||||
io::dump_csv_to_stdout(ldf);
|
||||
} else {
|
||||
if let Some(path) = matches.get_one::<String>("parquet") {
|
||||
io::write_parquet(ldf, path.to_string());
|
||||
} else {
|
||||
io::write_ipc(ldf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the wpq command
|
||||
pub fn handle_wpq(matches: &ArgMatches) {
|
||||
let delimiter = match matches.get_one::<String>("delimiter") {
|
||||
Some(delimiter) => delimiter.as_bytes()[0],
|
||||
None => b',',
|
||||
};
|
||||
if let Some(path) = matches.get_one::<String>("path") {
|
||||
let ldf = if matches.get_flag("text") {
|
||||
io::load_csv_from_stdin(delimiter)
|
||||
} else {
|
||||
io::read_ipc()
|
||||
};
|
||||
io::write_parquet(ldf, path.to_string());
|
||||
} else {
|
||||
eprintln!("Could now write to parquet");
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the schema command
|
||||
pub fn handle_schema(matches: &ArgMatches) {
|
||||
let delimiter = match matches.get_one::<String>("delimiter") {
|
||||
Some(delimiter) => delimiter.as_bytes()[0],
|
||||
None => b',',
|
||||
};
|
||||
let ldf = if matches.get_flag("stdin") {
|
||||
io::load_csv_from_stdin(delimiter)
|
||||
} else {
|
||||
io::read_ipc()
|
||||
};
|
||||
|
||||
if matches.get_flag("summary") {
|
||||
schema::print_schema(ldf);
|
||||
} else if matches.get_flag("postgresql") {
|
||||
let name = matches
|
||||
.get_one::<String>("name")
|
||||
.expect("Please provide a table name");
|
||||
let strlen: u32 = match matches.get_one::<String>("strlen") {
|
||||
Some(strlen) => strlen.parse::<u32>().unwrap(),
|
||||
None => 128,
|
||||
};
|
||||
schema::print_create(ldf, name.as_str(), strlen);
|
||||
}
|
||||
}
|
133
src/io.rs
133
src/io.rs
|
@ -1,105 +1,68 @@
|
|||
use polars_io::prelude::*;
|
||||
use polars_lazy::prelude::*;
|
||||
use polars::frame::DataFrame;
|
||||
use polars::prelude::*;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Read CSV file
|
||||
pub fn read_csv(path: String, delimiter: u8) -> LazyFrame {
|
||||
LazyCsvReader::new(path)
|
||||
.with_delimiter(delimiter)
|
||||
.with_infer_schema_length(None)
|
||||
.finish()
|
||||
.expect("Could not load file")
|
||||
}
|
||||
|
||||
/// Read parquet and return a Polars LazyFrame
|
||||
pub fn read_parquet(path: String) -> LazyFrame {
|
||||
LazyFrame::scan_parquet(path, ScanArgsParquet::default()).expect("Could not read parquet file")
|
||||
}
|
||||
|
||||
/// Read IPC setream
|
||||
pub fn read_ipc() -> LazyFrame {
|
||||
let mut buffer = Vec::new();
|
||||
let _res: () = match io::stdin().lock().read_to_end(&mut buffer) {
|
||||
Ok(_ok) => (),
|
||||
Err(_e) => (),
|
||||
};
|
||||
let cursor = io::Cursor::new(buffer);
|
||||
match IpcStreamReader::new(cursor).finish() {
|
||||
Ok(df) => df.lazy(),
|
||||
Err(_e) => LazyFrame::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Read CSV format from stdin and return a Polars DataFrame
|
||||
pub fn load_csv_from_stdin(delimiter: u8) -> LazyFrame {
|
||||
let mut buffer = Vec::new();
|
||||
let _res: () = match io::stdin().lock().read_to_end(&mut buffer) {
|
||||
pub fn load_csv_from_stdin() -> DataFrame {
|
||||
let mut buffer = String::new();
|
||||
let _res: () = match io::stdin().read_to_string(&mut buffer) {
|
||||
Ok(_ok) => (),
|
||||
Err(_e) => (),
|
||||
};
|
||||
let cursor = io::Cursor::new(buffer);
|
||||
match CsvReader::new(cursor).with_delimiter(delimiter).finish() {
|
||||
Ok(df) => df.lazy(),
|
||||
Err(_e) => LazyFrame::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Read CSV format from stdin and return a Polars DataFrame
|
||||
pub fn load_parquet_from_stdin() -> LazyFrame {
|
||||
let mut buffer = Vec::new();
|
||||
let _res: () = match io::stdin().lock().read_to_end(&mut buffer) {
|
||||
Ok(_ok) => (),
|
||||
Err(_e) => (),
|
||||
let cursor = io::Cursor::new(buffer.as_bytes());
|
||||
let df = match CsvReader::new(cursor).finish() {
|
||||
Ok(df) => df,
|
||||
Err(_e) => DataFrame::default(),
|
||||
};
|
||||
let cursor = io::Cursor::new(buffer);
|
||||
match ParquetReader::new(cursor).finish() {
|
||||
Ok(df) => df.lazy(),
|
||||
Err(_e) => LazyFrame::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write to IPC steram
|
||||
pub fn write_ipc(df: LazyFrame) {
|
||||
IpcStreamWriter::new(io::stdout().lock())
|
||||
.finish(&mut df.collect().expect("Could not collect dataframe"))
|
||||
.expect("Could not write to stream");
|
||||
df
|
||||
}
|
||||
|
||||
/// Take a Polars Dataframe and write it as CSV to stdout
|
||||
pub fn dump_csv_to_stdout(ldf: LazyFrame) {
|
||||
let _res: () = match CsvWriter::new(io::stdout().lock())
|
||||
.finish(&mut ldf.collect().expect("Could not collect"))
|
||||
{
|
||||
pub fn dump_csv_to_stdout(df: &mut DataFrame) {
|
||||
let _res: () = match CsvWriter::new(io::stdout().lock()).finish(df) {
|
||||
Ok(_ok) => (),
|
||||
Err(_e) => (),
|
||||
};
|
||||
}
|
||||
|
||||
/// Write a Polars DataFrame to Parquet
|
||||
/// Not yet supported in standard executor
|
||||
pub fn sink_parquet(ldf: LazyFrame, path: String) {
|
||||
// Selected compression not implemented yet
|
||||
let mut p = PathBuf::new();
|
||||
p.push(path);
|
||||
ldf.sink_parquet(
|
||||
p,
|
||||
ParquetWriteOptions {
|
||||
compression: ParquetCompression::Snappy,
|
||||
statistics: true,
|
||||
row_group_size: None,
|
||||
data_pagesize_limit: None,
|
||||
maintain_order: false,
|
||||
},
|
||||
)
|
||||
.expect("Could not save");
|
||||
/// Read parquet and return a Polars DataFrame
|
||||
pub fn read_parquet(path: String) -> DataFrame {
|
||||
let file = fs::File::open(path).expect("Could not open file");
|
||||
let df = match ParquetReader::new(file).finish() {
|
||||
Ok(df) => df,
|
||||
Err(e) => {
|
||||
eprintln!("{e}");
|
||||
DataFrame::default()
|
||||
}
|
||||
};
|
||||
df
|
||||
}
|
||||
|
||||
pub fn write_parquet(ldf: LazyFrame, path: String) {
|
||||
/// Write a Polars DataFrame to Parquet
|
||||
pub fn write_parquet(
|
||||
mut df: DataFrame,
|
||||
path: String,
|
||||
compression: String,
|
||||
statistics: bool,
|
||||
chunksize: Option<usize>,
|
||||
) {
|
||||
// Selected compression not implemented yet
|
||||
let mut file = std::fs::File::create(path).unwrap();
|
||||
ParquetWriter::new(&mut file)
|
||||
.finish(&mut ldf.collect().expect("Could not collect"))
|
||||
.unwrap();
|
||||
let mut _file = match fs::File::create(path) {
|
||||
Ok(mut file) => {
|
||||
let mut w = ParquetWriter::new(&mut file);
|
||||
if statistics {
|
||||
w = w.with_statistics(statistics);
|
||||
}
|
||||
if chunksize.unwrap_or(0) > 0 {
|
||||
w = w.with_row_group_size(chunksize);
|
||||
}
|
||||
let _r = match w.finish(&mut df) {
|
||||
Ok(_r) => (),
|
||||
Err(e) => eprintln!("{e}"),
|
||||
};
|
||||
}
|
||||
Err(e) => eprintln!("{e}"),
|
||||
};
|
||||
}
|
||||
|
|
79
src/main.rs
79
src/main.rs
|
@ -1,44 +1,57 @@
|
|||
mod commands;
|
||||
mod handlers;
|
||||
mod io;
|
||||
mod schema;
|
||||
mod sql;
|
||||
use clap::command;
|
||||
use clap::{arg, command, Command};
|
||||
|
||||
fn main() {
|
||||
// Commands definition
|
||||
let matches = command!()
|
||||
.author("Guillem Borrell")
|
||||
.version(env!("CARGO_PKG_VERSION"))
|
||||
.about("dr is a handy command line tool to handle csv an parquet files")
|
||||
.long_about(
|
||||
"dr is a handy command line tool to handle csv and parquet files.
|
||||
It is designed to integrate nicely with other command line tools
|
||||
like cat, sed, awk and database clients cli. You can find more
|
||||
information an a short tutorial https://git.guillemborrell.es/guillem/dr
|
||||
",
|
||||
.subcommand(
|
||||
Command::new("sql")
|
||||
.about("Runs a sql statement on the file")
|
||||
.arg(arg!([statement] "SQL statement"))
|
||||
.arg(arg!(-d --delimiter <String> "Column delimiter").required(false)),
|
||||
)
|
||||
.subcommand(Command::new("print").about("Pretty prints the table"))
|
||||
.subcommand(
|
||||
Command::new("rpq")
|
||||
.about("Read parquet file")
|
||||
.arg(arg!([path] "Path to the parquet file")),
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("wpq")
|
||||
.about("Write to a paquet file")
|
||||
.arg(arg!([path] "Path to the new parquet file")),
|
||||
)
|
||||
.subcommand(commands::gen_csv_command())
|
||||
.subcommand(commands::gen_schema_command())
|
||||
.subcommand(commands::gen_sql_command())
|
||||
.subcommand(commands::gen_print_command())
|
||||
.subcommand(commands::gen_rpq_command())
|
||||
.subcommand(commands::gen_wpq_command())
|
||||
.get_matches();
|
||||
|
||||
// Send the flow to the corresponding handler
|
||||
if let Some(sub_matches) = matches.subcommand_matches("csv") {
|
||||
handlers::handle_csv(sub_matches);
|
||||
} else if let Some(sub_matches) = matches.subcommand_matches("sql") {
|
||||
handlers::handle_sql(sub_matches);
|
||||
} else if let Some(sub_matches) = matches.subcommand_matches("print") {
|
||||
handlers::handle_print(sub_matches);
|
||||
} else if let Some(sub_matches) = matches.subcommand_matches("rpq") {
|
||||
handlers::handle_rpq(sub_matches);
|
||||
} else if let Some(sub_matches) = matches.subcommand_matches("wpq") {
|
||||
handlers::handle_wpq(sub_matches);
|
||||
} else if let Some(sub_matches) = matches.subcommand_matches("schema") {
|
||||
handlers::handle_schema(sub_matches);
|
||||
if let Some(matches) = matches.subcommand_matches("sql") {
|
||||
//if let Some(delimiter) = matches.get_one::<String>("delimiter") {
|
||||
// println!("DEBUG: Delimiter: {delimiter}")
|
||||
//} else {
|
||||
// println!("DEBUG: No delimiter")
|
||||
//}
|
||||
if let Some(statement) = matches.get_one::<String>("statement") {
|
||||
sql::execute(statement);
|
||||
} else {
|
||||
let mut df = io::load_csv_from_stdin();
|
||||
io::dump_csv_to_stdout(&mut df);
|
||||
}
|
||||
} else if let Some(_matches) = matches.subcommand_matches("print") {
|
||||
let df = io::load_csv_from_stdin();
|
||||
println!("{}", df)
|
||||
} else if let Some(matches) = matches.subcommand_matches("rpq") {
|
||||
if let Some(path) = matches.get_one::<String>("path") {
|
||||
let mut df = io::read_parquet(path.to_string());
|
||||
io::dump_csv_to_stdout(&mut df);
|
||||
} else {
|
||||
eprintln!("File not found")
|
||||
}
|
||||
} else if let Some(matches) = matches.subcommand_matches("wpq") {
|
||||
if let Some(path) = matches.get_one::<String>("path") {
|
||||
let df = io::load_csv_from_stdin();
|
||||
io::write_parquet(df, path.to_string(), "lz4raw".to_string(), true, Some(0));
|
||||
} else {
|
||||
eprintln!("Could now write to parquet");
|
||||
}
|
||||
} else {
|
||||
println!("No command provided. Please execute dr --help")
|
||||
}
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
use polars_lazy::prelude::*;
|
||||
use sea_query::table::ColumnType;
|
||||
use sea_query::*;
|
||||
|
||||
pub fn print_schema(ldf: LazyFrame) {
|
||||
let schema = ldf.schema().expect("Could not retreive schema");
|
||||
for f in schema.iter_fields() {
|
||||
let mut unnamed_cols_counter = 0;
|
||||
let d = f.data_type().to_string();
|
||||
let n = if f.name.is_empty() {
|
||||
unnamed_cols_counter += 1;
|
||||
format!("Column{}", unnamed_cols_counter)
|
||||
} else {
|
||||
f.name
|
||||
};
|
||||
|
||||
println!("{n} ({d})");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn print_create(ldf: LazyFrame, table_name: &str, default_strlen: u32) {
|
||||
let schema = ldf.schema().expect("Could not retreive schema");
|
||||
// Create empty table
|
||||
let mut statements = vec![Table::create()
|
||||
.table(Alias::new(table_name))
|
||||
.if_not_exists()
|
||||
.to_string(PostgresQueryBuilder)];
|
||||
|
||||
// Alter table adding fields one by one
|
||||
let mut unnamed_cols_counter = 0;
|
||||
for f in schema.iter_fields() {
|
||||
let dtype = match f.data_type().to_string().as_str() {
|
||||
"i64" => ColumnType::Integer,
|
||||
"f64" => ColumnType::Float,
|
||||
"str" => ColumnType::String(Some(default_strlen)),
|
||||
"bool" => ColumnType::Boolean,
|
||||
&_ => todo!("Datatype {} not supported", f.data_type().to_string()),
|
||||
};
|
||||
|
||||
let name = if f.name.is_empty() {
|
||||
unnamed_cols_counter += 1;
|
||||
format!("Column{}", unnamed_cols_counter)
|
||||
} else {
|
||||
f.name
|
||||
};
|
||||
|
||||
let table = Table::alter()
|
||||
.table(Alias::new(table_name))
|
||||
.add_column(&mut ColumnDef::new_with_type(Alias::new(&name), dtype))
|
||||
.to_owned();
|
||||
statements.push(table.to_string(PostgresQueryBuilder));
|
||||
}
|
||||
|
||||
// Finallyls print all statements
|
||||
for statement in statements {
|
||||
println!("{};", statement);
|
||||
}
|
||||
}
|
23
src/sql.rs
23
src/sql.rs
|
@ -1,10 +1,19 @@
|
|||
use crate::io::dump_csv_to_stdout;
|
||||
use crate::io::load_csv_from_stdin;
|
||||
use polars_lazy::frame::IntoLazy;
|
||||
use polars_sql::SQLContext;
|
||||
use polars_lazy::prelude::LazyFrame;
|
||||
|
||||
pub fn execute(ldf: LazyFrame, statement: &String) -> LazyFrame {
|
||||
let mut context = SQLContext::try_new().expect("Could not create context");
|
||||
context.register("this", ldf);
|
||||
context
|
||||
.execute(statement)
|
||||
.expect("Could not execute statement")
|
||||
pub fn execute(statement: &String) {
|
||||
if let Ok(mut context) = SQLContext::try_new() {
|
||||
let df = load_csv_from_stdin();
|
||||
context.register("this", df.lazy());
|
||||
if let Ok(res) = context.execute(statement) {
|
||||
if let Ok(mut res) = res.collect() {
|
||||
dump_csv_to_stdout(&mut res);
|
||||
};
|
||||
};
|
||||
if let Err(e) = context.execute(statement) {
|
||||
eprintln!("Query execution error {e}")
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue