From e8c7600ed7caf0770c6d9b2a7c4506b3b925eff8 Mon Sep 17 00:00:00 2001
From: Guillem Borrell
Date: Sun, 16 Jun 2024 08:56:45 +0200
Subject: [PATCH] Better tests and docs
---
requirements.in | 4 ++-
src/hellocomputer/db.py | 29 +++++++++++------
src/hellocomputer/routers/analysis.py | 6 ++--
src/hellocomputer/sessions.py | 2 +-
src/hellocomputer/static/about.html | 2 +-
src/hellocomputer/static/index.html | 6 ++--
src/hellocomputer/static/script.js | 30 ++++++++++++------
.../templates/TestExcelHelloComputer.xlsx | Bin 0 -> 10937 bytes
src/hellocomputer/users.py | 4 +--
test/test_query.py | 24 +++++++++++++-
10 files changed, 78 insertions(+), 29 deletions(-)
create mode 100644 src/hellocomputer/static/templates/TestExcelHelloComputer.xlsx
diff --git a/requirements.in b/requirements.in
index 640ff7d..9521c80 100644
--- a/requirements.in
+++ b/requirements.in
@@ -6,9 +6,11 @@ pydantic-settings
s3fs
aiofiles
duckdb
+duckdb-engine
polars
pyarrow
pyjwt[crypto]
python-multipart
authlib
-itsdangerous
\ No newline at end of file
+itsdangerous
+sqlalchemy
\ No newline at end of file
diff --git a/src/hellocomputer/db.py b/src/hellocomputer/db.py
index 3630616..528ac00 100644
--- a/src/hellocomputer/db.py
+++ b/src/hellocomputer/db.py
@@ -1,8 +1,7 @@
from enum import StrEnum
+from sqlalchemy import create_engine, text
from pathlib import Path
-import duckdb
-
class StorageEngines(StrEnum):
local = "Local"
@@ -19,11 +18,13 @@ class DDB:
bucket: str | None = None,
**kwargs,
):
- self.db = duckdb.connect()
- self.db.install_extension("spatial")
- self.db.install_extension("httpfs")
- self.db.load_extension("spatial")
- self.db.load_extension("httpfs")
+ self.engine = create_engine(
+ "duckdb:///:memory:",
+ connect_args={
+ "preload_extensions": ["https", "spatial"],
+ "config": {"memory_limit": "300mb"},
+ },
+ )
self.sheets = tuple()
self.loaded = False
@@ -35,12 +36,18 @@ class DDB:
bucket is not None,
)
):
- self.db.sql(f"""
+ with self.engine.connect() as conn:
+ conn.execute(
+ text(
+ f"""
CREATE SECRET (
TYPE GCS,
KEY_ID '{gcs_access}',
SECRET '{gcs_secret}')
- """)
+ """
+ )
+ )
+
self.path_prefix = f"gcs://{bucket}"
else:
raise ValueError(
@@ -55,3 +62,7 @@ class DDB:
raise ValueError(
"With local storage you need to provide the path keyword argument"
)
+
+ @property
+ def db(self):
+ return self.engine.raw_connection()
diff --git a/src/hellocomputer/routers/analysis.py b/src/hellocomputer/routers/analysis.py
index 13e9331..ab39b98 100644
--- a/src/hellocomputer/routers/analysis.py
+++ b/src/hellocomputer/routers/analysis.py
@@ -1,6 +1,7 @@
from fastapi import APIRouter
from fastapi.responses import PlainTextResponse
+
from hellocomputer.db import StorageEngines
from hellocomputer.extraction import extract_code_block
from hellocomputer.sessions import SessionDB
@@ -13,7 +14,7 @@ router = APIRouter()
@router.get("/query", response_class=PlainTextResponse, tags=["queries"])
async def query(sid: str = "", q: str = "") -> str:
- chat = Chat(api_key=settings.anyscale_api_key, temperature=0.5)
+ llm = Chat(api_key=settings.anyscale_api_key, temperature=0.5)
db = SessionDB(
StorageEngines.gcs,
gcs_access=settings.gcs_access,
@@ -22,9 +23,8 @@ async def query(sid: str = "", q: str = "") -> str:
sid=sid,
).load_folder()
- chat = await chat.eval("You're an expert sql developer", db.query_prompt(q))
+ chat = await llm.eval("You're a DUCKDB expert", db.query_prompt(q))
query = extract_code_block(chat.last_response_content())
result = str(db.query(query))
- print(result)
return result
diff --git a/src/hellocomputer/sessions.py b/src/hellocomputer/sessions.py
index bb1c0d9..dd96104 100644
--- a/src/hellocomputer/sessions.py
+++ b/src/hellocomputer/sessions.py
@@ -149,7 +149,7 @@ class SessionDB(DDB):
)
@property
- def schema(self):
+ def schema(self) -> str:
return os.linesep.join(
[
"The schema of the database is the following:",
diff --git a/src/hellocomputer/static/about.html b/src/hellocomputer/static/about.html
index 04ef787..ae7ccdf 100644
--- a/src/hellocomputer/static/about.html
+++ b/src/hellocomputer/static/about.html
@@ -37,7 +37,7 @@
Hola, computer! is a web assistant that allows you to query excel files using natural language. It may
not be as powerful as Excel, but it has an efficient query backend that can process your data faster
- and more efficiently than Excel.
+ than Excel.