From a021e1fd1ddebeeefab7772527f0d9d998ddf593 Mon Sep 17 00:00:00 2001 From: Tuana Celik Date: Fri, 18 Oct 2024 13:52:28 +0200 Subject: [PATCH] newsletter cookbook --- index.toml | 9 +- notebooks/newsletter-agent.ipynb | 855 +++++++++++++++++++++++++++++++ 2 files changed, 863 insertions(+), 1 deletion(-) create mode 100644 notebooks/newsletter-agent.ipynb diff --git a/index.toml b/index.toml index 0cebd52..97340f9 100644 --- a/index.toml +++ b/index.toml @@ -260,4 +260,11 @@ new = true title = "Advanced RAG: Automated Structured Metadata Enrichment" notebook = "metadata_enrichment.ipynb" new = true -topics = ["Advanced Retrieval", "RAG", "Metadata"] \ No newline at end of file +topics = ["Advanced Retrieval", "RAG", "Metadata"] + +[[cookbook]] +title = "Newsletter Sending Agent with Experimental Haystack Tools" +notebook = "newsletter-agent.ipynb" +new = true +experimental = true +topics = ["Function Calling", "Chat", "Agents"] \ No newline at end of file diff --git a/notebooks/newsletter-agent.ipynb b/notebooks/newsletter-agent.ipynb new file mode 100644 index 0000000..4ef889f --- /dev/null +++ b/notebooks/newsletter-agent.ipynb @@ -0,0 +1,855 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# πŸ—žοΈ Newsletter Agent with Haystack Tools\n", + "\n", + "πŸ§‘β€πŸ³ **Demo by Stefano Fiorucci ([X](https://x.com/theanakin87), [LinkedIn](https://www.linkedin.com/in/stefano-fiorucci/)) and Tuana Celik([X](https://x.com/tuanacelik), [LinkedIn](https://www.linkedin.com/in/tuanacelik/))**" + ], + "metadata": { + "id": "rkwAPsgBdnFf" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Install dependencies" + ], + "metadata": { + "id": "UVEseyMhnfu_" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LBNbJhcf6NCB" + }, + "outputs": [], + "source": [ + "! pip install haystack-ai trafilatura" + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### **Experimental Features**\n", + "\n", + "In this demo, we are using Haystack features that are `from haystack_experimental`.\n", + "\n", + "- See all the experiemtal features in the [`haystack-experimental` repository](https://github.com/deepset-ai/haystack-experimental)\n", + "- Discuss each feature in the [discussion board](https://github.com/deepset-ai/haystack-experimental/discussions/98)\n", + "\n", + "The experimantal components and extensions we are using here are:\n", + "- The `Tool` and the extended `ChatMessage` datatypes.\n", + "- The `ToolInvoker` component." + ], + "metadata": { + "id": "q7eXbW9S0lrt" + } + }, + { + "cell_type": "code", + "source": [ + "from typing import List\n", + "from trafilatura import fetch_url, extract\n", + "import requests\n", + "from getpass import getpass\n", + "import os\n", + "\n", + "from haystack_experimental.components.generators.chat import OpenAIChatGenerator\n", + "from haystack_experimental.dataclasses import Tool, ChatMessage\n", + "from haystack_experimental.components.tools import ToolInvoker\n", + "\n", + "from haystack import Pipeline\n", + "from haystack.components.builders import PromptBuilder\n", + "from haystack.components.generators import OpenAIGenerator" + ], + "metadata": { + "id": "k0qnuVES2p4t" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Hacker News Fetcher Tool\n", + "\n", + "In a previous article and recipe, we had shown how you can create a custom component for Haystack called the `HackerNewsFetcher`.\n", + "\n", + "Here, we are doing something very similar, but instead we are creating a function and using that as a `Tool` instead.\n", + "\n", + "πŸ“š [Hacker News Summaries with Custom Components](https://haystack.deepset.ai/cookbook/hackernews-custom-component-rag?utm_campaign=developer-relations&utm_source=tools-livestream)\n", + "\n", + "This tool expects `top_k` as input, and returns that many of the _current_ top stories on Hacker News πŸš€" + ], + "metadata": { + "id": "uHElH_u323kd" + } + }, + { + "cell_type": "code", + "source": [ + "def hacker_news_fetcher(top_k: int = 3):\n", + " newest_list = requests.get(url='https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty')\n", + " urls = []\n", + " articles = []\n", + " for id_ in newest_list.json()[0:top_k]:\n", + " article = requests.get(url=f\"https://hacker-news.firebaseio.com/v0/item/{id_}.json?print=pretty\")\n", + " if 'url' in article.json():\n", + " urls.append(article.json()['url'])\n", + " elif 'text' in article.json():\n", + " articles.append(article.json()['text'])\n", + "\n", + " for url in urls:\n", + " try:\n", + " downloaded = fetch_url(url)\n", + " text = extract(downloaded)\n", + " if text is not None:\n", + " articles.append(text[:500])\n", + " except Exception as e:\n", + " print(e)\n", + " print(f\"Couldn't download {url}, skipped\")\n", + "\n", + " return articles" + ], + "metadata": { + "id": "8SrNAWvA27D3" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "hacker_news_fetcher_tool = Tool(name=\"hacker_news_fetcher\",\n", + " description=\"Fetch the top k articles from hacker news\",\n", + " function=hacker_news_fetcher,\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"top_k\": {\n", + " \"type\": \"integer\",\n", + " \"description\": \"The number of articles to fetch\"\n", + " }\n", + " },\n", + " })" + ], + "metadata": { + "id": "_mmDM7mE3GtG" + }, + "execution_count": 28, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Newsletter generation Pipeline and Tool\n", + "\n", + "For the Newsletter gnereation tool, we will be creating a Haystack pipeline, and making our pipeline itself a tool.\n", + "\n", + "Our tool will expect the following inputs:\n", + "\n", + "- `articles`: Content to base the newsletter off of\n", + "- `target_people`: The audience we want to target, for example \"engineers\" may be our target audience\n", + "- `n_words`: The number of words we want to limit our newsletter to" + ], + "metadata": { + "id": "-qcpJPsb3MFe" + } + }, + { + "cell_type": "code", + "source": [ + "if not \"OPENAI_API_KEY\" in os.environ:\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter your OpenAI API key: \")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SIHSxnol3a3o", + "outputId": "e2789bf0-7b09-4ba9-e459-5688b9775739" + }, + "execution_count": 5, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter your OpenAI API key: Β·Β·Β·Β·Β·Β·Β·Β·Β·Β·\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "template = \"\"\"\n", + "Create a entertaining newsletter for {{target_people}} based on the following articles.\n", + "The newsletter should be well structured, with a unique angle and a maximum of {{n_words}} words.\n", + "\n", + "Articles:\n", + "{% for article in articles %}\n", + " {{ article }}\n", + " ---\n", + "{% endfor %}\n", + "\"\"\"\n", + "\n", + "newsletter_pipe = Pipeline()\n", + "newsletter_pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n", + "newsletter_pipe.add_component(\"llm\", OpenAIGenerator(model=\"gpt-4o-mini\"))\n", + "newsletter_pipe.connect(\"prompt_builder\", \"llm\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NhfQOF4v3KJu", + "outputId": "7804c046-c3b4-4872-b26b-fad0eb187628" + }, + "execution_count": 29, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "\n", + "πŸš… Components\n", + " - prompt_builder: PromptBuilder\n", + " - llm: OpenAIGenerator\n", + "πŸ›€οΈ Connections\n", + " - prompt_builder.prompt -> llm.prompt (str)" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def newsletter_pipeline_func(articles: List[str], target_people: str = \"programmers\", n_words: int = 100):\n", + " result = newsletter_pipe.run({\"prompt_builder\": {\"articles\": articles, \"target_people\": target_people, \"n_words\": n_words}})\n", + "\n", + " return {\"reply\": result[\"llm\"][\"replies\"][0]}\n", + "\n", + "newsletter_tool = Tool(name=\"newsletter_generator\",\n", + " description=\"Generate a newsletter based on some articles\",\n", + " function=newsletter_pipeline_func,\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"articles\": {\n", + " \"type\": \"array\",\n", + " \"items\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The articles to base the newsletter on\",\n", + " }\n", + " },\n", + " \"target_people\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The target audience for the newsletter\",\n", + " },\n", + " \"n_words\": {\n", + " \"type\": \"integer\",\n", + " \"description\": \"The number of words to summarize the newsletter to\",\n", + " }\n", + " },\n", + " \"required\": [\"articles\"],\n", + " })" + ], + "metadata": { + "id": "8YsxYyP83SL9" + }, + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Send Email Tool\n", + "\n", + "Here, we are creating a Gmail tool. You login with your gmail account, allowing the final Agent to send emails from your email, to others.\n", + "\n", + "> ⚠️ Note: To be able to use the gmail too, you have to create an app password for your Gmail account, which will be the sender. You can delete this after.\n", + "\n", + "To configure our `email` Tool, you have to provide the following information about the sender email account πŸ‘‡" + ], + "metadata": { + "id": "qNJLc2te1BOi" + } + }, + { + "cell_type": "code", + "source": [ + "if not \"NAME\" in os.environ:\n", + " os.environ[\"NAME\"] = input(\"What's your name? \")\n", + "if not \"SENDER_EMAIL\" in os.environ:\n", + " os.environ[\"SENDER_EMAIL\"] = getpass(\"Enter your Gmail e-mail: \")\n", + "if not \"GMAIL_APP_PASSWORD\" in os.environ:\n", + " os.environ[\"GMAIL_APP_PASSWORD\"] = getpass(\"Enter your Gmail App Password: \")" + ], + "metadata": { + "id": "6btTAlL31DmE" + }, + "execution_count": 31, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Next, we create a `Tool` that expects the following input:\n", + "- `receiver`: The email address that we want to send an email to\n", + "- `body`: The body of the email\n", + "- `subject`: The subject line for the email." + ], + "metadata": { + "id": "PYZs8Wjbq6Gy" + } + }, + { + "cell_type": "code", + "source": [ + "import smtplib, ssl\n", + "from email.mime.text import MIMEText\n", + "\n", + "def send_email(receiver: str, body: str, subject: str):\n", + " msg = MIMEText(body)\n", + " sender_email = os.environ['SENDER_EMAIL']\n", + " sender_name = os.environ['NAME']\n", + " sender = f\"{sender_name} <{sender_email}>\"\n", + " msg['Subject'] = subject\n", + " msg['From'] = sender\n", + " port = 465 # For SSL\n", + " smtp_server = \"smtp.gmail.com\"\n", + " password = os.environ[\"GMAIL_APP_PASSWORD\"]\n", + " context = ssl.create_default_context()\n", + " with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:\n", + " server.login(sender_email, password)\n", + " server.sendmail(sender_email, receiver, msg.as_string())\n", + " return 'Email sent!'" + ], + "metadata": { + "id": "DIYBOe5l1AL-" + }, + "execution_count": 32, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "email_tool = Tool(name=\"email\",\n", + " description=\"Send emails with specific content\",\n", + " function=send_email,\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"receiver\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The email of the receiver\"\n", + " },\n", + " \"body\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The content of the email\"\n", + " },\n", + " \"subject\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The subject of the email\"\n", + " }\n", + " },\n", + " })" + ], + "metadata": { + "id": "MrB2O_Yq3Dw0" + }, + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Newsletter Sending Chat Agent\n", + "\n", + "Now, we build a Newsletter creating chat agent which we can use to ask for newsletters, as well as sending them to given email addresses." + ], + "metadata": { + "id": "K4qpD_nO3wol" + } + }, + { + "cell_type": "code", + "source": [ + "chat_generator = OpenAIChatGenerator(tools=[hacker_news_fetcher_tool, newsletter_tool, email_tool])\n", + "\n", + "tool_invoker = ToolInvoker(tools=[hacker_news_fetcher_tool, newsletter_tool, email_tool])\n", + "\n", + "messages = [\n", + " ChatMessage.from_system(\n", + " \"\"\"Prepare a tool call if needed, otherwise use your knowledge to respond to the user.\n", + " If the invocation of a tool requires the result of another tool, prepare only one call at a time.\n", + "\n", + " Each time you receive the result of a tool call, ask yourself: \"Am I done with the task?\".\n", + " If not and you need to invoke another tool, prepare the next tool call.\n", + " If you are done, respond with just the final result.\"\"\"\n", + " )\n", + " ]\n", + "\n", + "while True:\n", + " user_input = input(\"\\n\\nwaiting for input (type 'exit' or 'quit' to stop)\\nπŸ§‘: \")\n", + " if user_input.lower() == \"exit\" or user_input.lower() == \"quit\":\n", + " break\n", + " messages.append(ChatMessage.from_user(user_input))\n", + "\n", + " while True:\n", + " print(\"βŒ› iterating...\")\n", + "\n", + " replies = chat_generator.run(messages=messages)[\"replies\"]\n", + " messages.extend(replies)\n", + "\n", + " # Check for tool calls and handle them\n", + " if not replies[0].tool_calls:\n", + " break\n", + " tool_calls = replies[0].tool_calls\n", + "\n", + " # Print tool calls for debugging\n", + " for tc in tool_calls:\n", + " print(\"\\n TOOL CALL:\")\n", + " print(f\"\\t{tc.id}\")\n", + " print(f\"\\t{tc.tool_name}\")\n", + " for k,v in tc.arguments.items():\n", + " v_truncated = str(v)[:50]\n", + " print(f\"\\t{k}: {v_truncated}{'' if len(v_truncated) == len(str(v)) else '...'}\")\n", + "\n", + " tool_messages = tool_invoker.run(messages=replies)[\"tool_messages\"]\n", + " messages.extend(tool_messages)\n", + "\n", + "\n", + " # Print the final AI response after all tool calls are resolved\n", + " print(f\"πŸ€–: {messages[-1].text}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bg3K8r6f33fF", + "outputId": "817b509b-3f00-431c-9d96-d325d7e5bb49" + }, + "execution_count": 34, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "waiting for input (type 'exit' or 'quit' to stop)\n", + "πŸ§‘: What is the top HN article now?\n", + "βŒ› iterating...\n", + "\n", + " TOOL CALL:\n", + "\tcall_aOluHPdSMAGosryayVwNxOvL\n", + "\thacker_news_fetcher\n", + "\ttop_k: 1\n", + "βŒ› iterating...\n", + "\n", + " TOOL CALL:\n", + "\tcall_G6Z10LdGwJgdqxspgIWwvsnl\n", + "\thacker_news_fetcher\n", + "\ttop_k: 1\n", + "βŒ› iterating...\n", + "πŸ€–: It appears that I'm facing an issue retrieving data from Hacker News, as the response does not contain the actual article information. Unfortunately, I'm unable to provide the top article at this moment. You might want to check the Hacker News website directly for the latest articles.\n", + "\n", + "\n", + "waiting for input (type 'exit' or 'quit' to stop)\n", + "πŸ§‘: What's the top 2 HN articles?\n", + "βŒ› iterating...\n", + "\n", + " TOOL CALL:\n", + "\tcall_SWI39GuYw579wRDhQVs02WUu\n", + "\thacker_news_fetcher\n", + "\ttop_k: 2\n", + "βŒ› iterating...\n", + "πŸ€–: It seems I'm encountering difficulties retrieving the top articles from Hacker News properly. However, based on the partial information I received, one of the articles is:\n", + "\n", + "1. **Adobe's new image rotation tool** - \"Project Turnable\" lets users fully rotate 2D vectors. This tool was showcased at Adobe's annual MAX conference as part of their \"Sneaks\" segment, where engineers present innovative ideas that may or may not be fully developed.\n", + "\n", + "Unfortunately, the first article did not provide relevant content. For the most accurate and complete information on the top Hacker News articles, I recommend checking the Hacker News website directly.\n", + "\n", + "\n", + "waiting for input (type 'exit' or 'quit' to stop)\n", + "πŸ§‘: Create a newsletter targeted at engineers based on this article. No more than 100 words.\n", + "βŒ› iterating...\n", + "\n", + " TOOL CALL:\n", + "\tcall_duVE2eBKkCe3wpJOuq6NEJte\n", + "\tnewsletter_generator\n", + "\tarticles: [\"Adobe's new image rotation tool is one of the mo...\n", + "\ttarget_people: engineers\n", + "\tn_words: 100\n", + "βŒ› iterating...\n", + "πŸ€–: **Engineering Whirlwind** \n", + "*Issue #42: AI Innovations Turned Up to Eleven*\n", + "\n", + "Hello, Innovators!\n", + "\n", + "Dive into Adobe's latest gem, *Project Turntable*! This fascinating tool offers engineers a chance to fully rotate 2D vectors like never before. Unveiled at their MAX conference, it sits at the intersection of imagination and engineering mastery. As Adobe's engineers sneak out innovative ideas, warm up those creative enginesβ€”who knows what else might come spinning your way?\n", + "\n", + "Stay sharp and keep spinning those ideas! \n", + "β€” The Engineering Brigade 🌟\n", + "\n", + "\n", + "waiting for input (type 'exit' or 'quit' to stop)\n", + "πŸ§‘: Email this to tuana.celik@deepset.ai You can decide on the subjectline\n", + "βŒ› iterating...\n", + "\n", + " TOOL CALL:\n", + "\tcall_e7thnZ8Bq1kBjU4dyrGLP0jK\n", + "\temail\n", + "\treceiver: tuana.celik@deepset.ai\n", + "\tbody: **Engineering Whirlwind** \n", + "*Issue #42: AI Innovat...\n", + "\tsubject: Latest Innovations: Adobe's Project Turntable\n", + "βŒ› iterating...\n", + "πŸ€–: The newsletter has been successfully emailed to tuana.celik@deepset.ai with the subject \"Latest Innovations: Adobe's Project Turntable.\" If you need any further assistance, feel free to ask!\n", + "\n", + "\n", + "waiting for input (type 'exit' or 'quit' to stop)\n", + "πŸ§‘: exit\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Extras: Converting Tools" + ], + "metadata": { + "id": "-IbTLr8ueNRh" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Convert functions into Tools\n", + "\n", + "This feature is not yet released, so we need to install `haystack-experimental` from main." + ], + "metadata": { + "id": "65DYH0JvePeO" + } + }, + { + "cell_type": "code", + "source": [ + "! pip install git+https://github.com/deepset-ai/haystack-experimental@main#egg=haystack-experimental" + ], + "metadata": { + "id": "NTp-02mLeO2V" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from typing import Annotated\n", + "from pprint import pp" + ], + "metadata": { + "id": "RCQJJb1Oesaf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Writing the JSON schema is not fun... πŸ€”" + ], + "metadata": { + "id": "xVX72INMhGCS" + } + }, + { + "cell_type": "code", + "source": [ + "def newsletter_pipeline_func(articles: List[str], target_people: str = \"programmers\", n_words: int = 100):\n", + " result = newsletter_pipe.run({\"prompt_builder\": {\"articles\": articles, \"target_people\": target_people, \"n_words\": n_words}})\n", + "\n", + " return {\"reply\": result[\"llm\"][\"replies\"][0]}\n", + "\n", + "newsletter_tool = Tool(name=\"newsletter_generator\",\n", + " description=\"Generate a newsletter based on some articles\",\n", + " function=newsletter_pipeline_func,\n", + " parameters={\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"articles\": {\n", + " \"type\": \"array\",\n", + " \"items\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The articles to include in the newsletter\",\n", + " }\n", + " },\n", + " \"target_people\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The target audience for the newsletter\",\n", + " },\n", + " \"n_words\": {\n", + " \"type\": \"integer\",\n", + " \"description\": \"The number of words to summarize the newsletter to\",\n", + " }\n", + " },\n", + " \"required\": [\"articles\"],\n", + " })" + ], + "metadata": { + "id": "01FSg09De5bA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "We can do this instead πŸ‘‡" + ], + "metadata": { + "id": "9laNlRiThU45" + } + }, + { + "cell_type": "code", + "source": [ + "def newsletter_pipeline_func(\n", + " articles: Annotated[List[str], \"The articles to include in the newsletter\"],\n", + " target_people: Annotated[str, \"The target audience for the newsletter\"] = \"programmers\",\n", + " n_words: Annotated[int, \"The number of words to summarize the newsletter to\"] = 100\n", + " ):\n", + " \"\"\"Generate a newsletter based on some articles\"\"\"\n", + "\n", + " result = newsletter_pipe.run({\"prompt_builder\": {\"articles\": articles, \"target_people\": target_people, \"n_words\": n_words}})\n", + "\n", + " return {\"reply\": result[\"llm\"][\"replies\"][0]}\n", + "\n", + "newsletter_tool = Tool.from_function(newsletter_pipeline_func)\n", + "\n", + "pp(newsletter_tool, width=200)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Htmwg90cfZLO", + "outputId": "d4dc2485-8ddf-4309-c7d5-0e3823068108" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Tool(name='newsletter_pipeline_func',\n", + " description='Generate a newsletter based on some articles',\n", + " parameters={'properties': {'articles': {'items': {'type': 'string'}, 'type': 'array', 'description': 'The articles to include in the newsletter'},\n", + " 'target_people': {'default': 'programmers', 'type': 'string', 'description': 'The target audience for the newsletter'},\n", + " 'n_words': {'default': 100, 'type': 'integer', 'description': 'The number of words to summarize the newsletter to'}},\n", + " 'required': ['articles'],\n", + " 'type': 'object'},\n", + " function=)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Convert Pre-Existing Tools into Haystack Tools\n", + "\n", + "Haystack is quite flexible. This means if you have tools already defined elsewhere, you are able to convert them to Haystack tools. For example,\n", + " [LangChain has several interesting tools](https://python.langchain.com/docs/integrations/tools/) that we can seamlessly convert into Haystack tools.\n" + ], + "metadata": { + "id": "lnmBWMqQhm6f" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install langchain-community" + ], + "metadata": { + "id": "BCjAE8fyjhPg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from pydantic import create_model\n", + "from haystack_experimental.dataclasses.tool import _remove_title_from_schema\n", + "\n", + "def convert_langchain_tool_to_haystack_tool(langchain_tool):\n", + " tool_name = langchain_tool.name\n", + " tool_description = langchain_tool.description\n", + "\n", + " def invocation_adapter(**kwargs):\n", + " return langchain_tool.invoke(input=kwargs)\n", + "\n", + " tool_function = invocation_adapter\n", + "\n", + " model_fields = langchain_tool.args_schema.model_fields\n", + "\n", + " fields = {name: (field.annotation, field.default) for name, field in model_fields.items()}\n", + " descriptions = {name: field.description for name, field in model_fields.items()}\n", + "\n", + " model = create_model(tool_name, **fields)\n", + " schema = model.model_json_schema()\n", + "\n", + " # we don't want to include title keywords in the schema, as they contain redundant information\n", + " # there is no programmatic way to prevent Pydantic from adding them, so we remove them later\n", + " # see https://github.com/pydantic/pydantic/discussions/8504\n", + " _remove_title_from_schema(schema)\n", + "\n", + " # add parameters descriptions to the schema\n", + " for name, description in descriptions.items():\n", + " if name in schema[\"properties\"]:\n", + " schema[\"properties\"][name][\"description\"] = description\n", + "\n", + " return Tool(name=tool_name, description=tool_description, parameters=schema, function=tool_function)" + ], + "metadata": { + "id": "0vU0n12zjz2T" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from langchain_community.agent_toolkits import FileManagementToolkit\n", + "toolkit = FileManagementToolkit(\n", + " root_dir=\"/\"\n", + ") # If you don't provide a root_dir, operations will default to the current working directory\n", + "toolkit.get_tools()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XbFgPSyXj6aD", + "outputId": "f78ac9f4-fbb7-438f-b461-2514d2a3715b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[CopyFileTool(root_dir='/'),\n", + " DeleteFileTool(root_dir='/'),\n", + " FileSearchTool(root_dir='/'),\n", + " MoveFileTool(root_dir='/'),\n", + " ReadFileTool(root_dir='/'),\n", + " WriteFileTool(root_dir='/'),\n", + " ListDirectoryTool(root_dir='/')]" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "langchain_listdir_tool = toolkit.get_tools()[-1]" + ], + "metadata": { + "id": "3fDROQavkoKo" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "haystack_listdir_tool = convert_langchain_tool_to_haystack_tool(langchain_listdir_tool)" + ], + "metadata": { + "id": "oBI83cqYkuBY" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from haystack_experimental.components import OpenAIChatGenerator, ToolInvoker\n", + "from haystack_experimental.dataclasses import ChatMessage\n", + "\n", + "chat_generator = OpenAIChatGenerator(model=\"gpt-4o-mini\", tools=[haystack_listdir_tool])\n", + "tool_invoker = ToolInvoker(tools=[haystack_listdir_tool])\n", + "\n", + "user_message = ChatMessage.from_user(\"List the files in /content/sample_data\")\n", + "\n", + "replies = chat_generator.run(messages=[user_message])[\"replies\"]\n", + "# print(f\"assistant messages: {replies}\")\n", + "\n", + "if replies[0].tool_calls:\n", + "\n", + " tool_messages = tool_invoker.run(messages=replies)[\"tool_messages\"]\n", + " # print(f\"tool messages: {tool_messages}\")\n", + "\n", + " # we pass all the messages to the Chat Generator\n", + " messages = [user_message] + replies + tool_messages\n", + " final_replies = chat_generator.run(messages=messages)[\"replies\"]\n", + " print(f\"{final_replies[0].text}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DzyphA4NkxXY", + "outputId": "1d18566c-1a58-4eed-ce99-ed152a894d30" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The files in the `/content/sample_data` directory are:\n", + "\n", + "1. anscombe.json\n", + "2. README.md\n", + "3. mnist_train_small.csv\n", + "4. california_housing_train.csv\n", + "5. california_housing_test.csv\n", + "6. mnist_test.csv\n" + ] + } + ] + } + ] +} \ No newline at end of file