hellocomputer/test/test_query.py

from pathlib import Path

import hellocomputer
import polars as pl
import pytest
from hellocomputer.config import Settings, StorageEngines
from hellocomputer.db.sessions import SessionDB
from hellocomputer.models import AvailableModels
from hellocomputer.prompts import Prompts
from hellocomputer.extraction import initial_intent_parser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain_openai import ChatOpenAI

settings = Settings(
    storage_engine=StorageEngines.local,
    path=Path(hellocomputer.__file__).parents[2] / "test" / "output",
)

TEST_XLS_PATH = (
    Path(hellocomputer.__file__).parents[2]
    / "test"
    / "data"
    / "TestExcelHelloComputer.xlsx"
)

SID = "test"


@pytest.mark.asyncio
@pytest.mark.skipif(settings.llm_api_key == "Awesome API", reason="API Key not set")
async def test_chat_simple():
    llm = ChatOpenAI(
        base_url=settings.llm_base_url,
        api_key=settings.llm_api_key,
        model=AvailableModels.mixtral_8x7b,
        temperature=0.5,
    )
    prompt = ChatPromptTemplate.from_template(
        """Say literally {word}, a single word. Don't be verbose, 
        I'll be disappointed if you say more than a single word"""
    )
    chain = prompt | llm
    response = await chain.ainvoke({"word": "Hello"})

    assert "hello" in response.content.lower()


@pytest.mark.asyncio
@pytest.mark.skipif(settings.llm_api_key == "Awesome API", reason="API Key not set")
async def test_query_context():
    db = SessionDB(settings, sid=SID).load_xls(TEST_XLS_PATH).llmsql

    llm = ChatOpenAI(
        base_url=settings.llm_base_url,
        api_key=settings.llm_api_key,
        model=AvailableModels.mixtral_8x7b,
        temperature=0.5,
    )

    toolkit = SQLDatabaseToolkit(db=db, llm=llm)
    context = toolkit.get_context()
    assert "table_info" in context
    assert "table_names" in context


@pytest.mark.asyncio
@pytest.mark.skipif(settings.llm_api_key == "Awesome API", reason="API Key not set")
async def test_initial_intent():
    llm = ChatOpenAI(
        base_url=settings.llm_base_url,
        api_key=settings.llm_api_key,
        model=AvailableModels.llama_small,
        temperature=0,
    )
    prompt = await Prompts.intent()
    chain = prompt | llm | initial_intent_parser

    response = await chain.ainvoke({"query", "Make me a sandwich"})
    assert response == "general"

    response = await chain.ainvoke(
        {"query", "Which is the average score of all the students"}
    )
    assert response == "query"


#
#     chat = await chat.sql_eval(db.query_prompt(query))
#     query = extract_code_block(chat.last_response_content())
#     assert query.startswith("SELECT")
#
#
# @pytest.mark.asyncio
# @pytest.mark.skipif(settings.llm_api_key == "Awesome API", reason="API Key not set")
# async def test_data_query():
#     q = "Find the average score of all the sudents"
#
#     llm = Chat(
#         api_key=settings.llm_api_key,
#         temperature=0.5,
#     )
#     db = SessionDB(
#         storage_engine=TEST_STORAGE, path=TEST_OUTPUT_FOLDER, sid="test"
#     ).load_folder()
#
#     chat = await llm.sql_eval(db.query_prompt(q))
#     query = extract_code_block(chat.last_response_content())
#     result: pl.DataFrame = db.query(query).pl()
#
#     assert result.shape[0] == 1
#     assert result.select([pl.col("avg(Score)")]).to_series()[0] == 0.5
#
Refactored analytics class 2024-05-28 22:23:11 +02:00			`from pathlib import Path`

Staging 2024-05-25 09:16:45 +02:00			`import hellocomputer`
Removed sensitive api key 2024-07-13 11:53:37 +02:00			`import polars as pl`
Pluggable authentication. Still need to fix gcs 2024-07-13 22:46:34 +02:00			`import pytest`
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`from hellocomputer.config import Settings, StorageEngines`
Pluggable authentication. Still need to fix gcs 2024-07-13 22:46:34 +02:00			`from hellocomputer.db.sessions import SessionDB`
Split models and prompts 2024-07-25 23:42:19 +02:00			`from hellocomputer.models import AvailableModels`
			`from hellocomputer.prompts import Prompts`
Slowly building the application with runnables. 2024-07-25 23:27:03 +02:00			`from hellocomputer.extraction import initial_intent_parser`
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`from langchain_core.prompts import ChatPromptTemplate`
			`from langchain_community.agent_toolkits import SQLDatabaseToolkit`
			`from langchain_openai import ChatOpenAI`

			`settings = Settings(`
			`storage_engine=StorageEngines.local,`
			`path=Path(hellocomputer.__file__).parents[2] / "test" / "output",`
			`)`
Staging 2024-05-25 09:16:45 +02:00
Refactored analytics class 2024-05-28 22:23:11 +02:00			`TEST_XLS_PATH = (`
			`Path(hellocomputer.__file__).parents[2]`
			`/ "test"`
			`/ "data"`
			`/ "TestExcelHelloComputer.xlsx"`
			`)`
Kind of refactored everything 2024-07-25 00:10:09 +02:00
Better tests and docs 2024-06-16 08:56:45 +02:00			`SID = "test"`
Test model if api key is available 2024-05-23 23:31:00 +02:00

			`@pytest.mark.asyncio`
Ported to fireworks 2024-07-13 10:52:45 +02:00			`@pytest.mark.skipif(settings.llm_api_key == "Awesome API", reason="API Key not set")`
Test model if api key is available 2024-05-23 23:31:00 +02:00			`async def test_chat_simple():`
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`llm = ChatOpenAI(`
			`base_url=settings.llm_base_url,`
Ported to fireworks 2024-07-13 10:52:45 +02:00			`api_key=settings.llm_api_key,`
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`model=AvailableModels.mixtral_8x7b,`
Ported to fireworks 2024-07-13 10:52:45 +02:00			`temperature=0.5,`
			`)`
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`prompt = ChatPromptTemplate.from_template(`
			`"""Say literally {word}, a single word. Don't be verbose,`
			`I'll be disappointed if you say more than a single word"""`
			`)`
			`chain = prompt \| llm`
			`response = await chain.ainvoke({"word": "Hello"})`
Staging 2024-05-25 09:16:45 +02:00
Slowly building the application with runnables. 2024-07-25 23:27:03 +02:00			`assert "hello" in response.content.lower()`
Better tests and docs 2024-06-16 08:56:45 +02:00

			`@pytest.mark.asyncio`
Ported to fireworks 2024-07-13 10:52:45 +02:00			`@pytest.mark.skipif(settings.llm_api_key == "Awesome API", reason="API Key not set")`
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`async def test_query_context():`
			`db = SessionDB(settings, sid=SID).load_xls(TEST_XLS_PATH).llmsql`
Better tests and docs 2024-06-16 08:56:45 +02:00
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`llm = ChatOpenAI(`
			`base_url=settings.llm_base_url,`
Ported to fireworks 2024-07-13 10:52:45 +02:00			`api_key=settings.llm_api_key,`
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`model=AvailableModels.mixtral_8x7b,`
Ported to fireworks 2024-07-13 10:52:45 +02:00			`temperature=0.5,`
			`)`
Better tests and docs 2024-06-16 08:56:45 +02:00
Kind of refactored everything 2024-07-25 00:10:09 +02:00			`toolkit = SQLDatabaseToolkit(db=db, llm=llm)`
			`context = toolkit.get_context()`
			`assert "table_info" in context`
			`assert "table_names" in context`

Better tests and docs 2024-06-16 08:56:45 +02:00
Slowly building the application with runnables. 2024-07-25 23:27:03 +02:00			`@pytest.mark.asyncio`
			`@pytest.mark.skipif(settings.llm_api_key == "Awesome API", reason="API Key not set")`
			`async def test_initial_intent():`
			`llm = ChatOpenAI(`
			`base_url=settings.llm_base_url,`
			`api_key=settings.llm_api_key,`
Split models and prompts 2024-07-25 23:42:19 +02:00			`model=AvailableModels.llama_small,`
			`temperature=0,`
Slowly building the application with runnables. 2024-07-25 23:27:03 +02:00			`)`
			`prompt = await Prompts.intent()`
			`chain = prompt \| llm \| initial_intent_parser`

			`response = await chain.ainvoke({"query", "Make me a sandwich"})`
			`assert response == "general"`

			`response = await chain.ainvoke(`
			`{"query", "Which is the average score of all the students"}`
			`)`
			`assert response == "query"`


Kind of refactored everything 2024-07-25 00:10:09 +02:00			`#`
			`# chat = await chat.sql_eval(db.query_prompt(query))`
			`# query = extract_code_block(chat.last_response_content())`
			`# assert query.startswith("SELECT")`
			`#`
			`#`
			`# @pytest.mark.asyncio`
			`# @pytest.mark.skipif(settings.llm_api_key == "Awesome API", reason="API Key not set")`
			`# async def test_data_query():`
			`# q = "Find the average score of all the sudents"`
			`#`
			`# llm = Chat(`
			`# api_key=settings.llm_api_key,`
			`# temperature=0.5,`
			`# )`
			`# db = SessionDB(`
			`# storage_engine=TEST_STORAGE, path=TEST_OUTPUT_FOLDER, sid="test"`
			`# ).load_folder()`
			`#`
			`# chat = await llm.sql_eval(db.query_prompt(q))`
			`# query = extract_code_block(chat.last_response_content())`
			`# result: pl.DataFrame = db.query(query).pl()`
			`#`
			`# assert result.shape[0] == 1`
			`# assert result.select([pl.col("avg(Score)")]).to_series()[0] == 0.5`
			`#`