From 19388f19918d96d37fd1e875e47234520cd60e61 Mon Sep 17 00:00:00 2001 From: JohnGilhuly Date: Thu, 26 Sep 2024 16:20:09 -0700 Subject: [PATCH 1/2] Add Phoenix examples --- README.md | 2 + index.toml | 12 + .../arize_phoenix_evaluate_haystack_rag.ipynb | 828 ++++++++++++++++++ .../arize_phoenix_tracing_haystack.ipynb | 193 ++++ 4 files changed, 1035 insertions(+) create mode 100644 notebooks/arize_phoenix_evaluate_haystack_rag.ipynb create mode 100644 notebooks/arize_phoenix_tracing_haystack.ipynb diff --git a/README.md b/README.md index a0f932d..1c89604 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,8 @@ For more examples, you may also find our [Blog](https://haystack.deepset.ai/blog | Cohere for Multilingual QA (Haystack 1.x)| Open In Colab| | GPT-4 and Weaviate for Custom Documentation QA (Haystack 1.x)| Open In Colab| | Whisper Transcriber and Weaviate for YouTube video QA (Haystack 1.x)| Open In Colab| +| Trace a Haystack Pipeline with Arize Phoenix | Open In Colab| +| Evaluate RAG with Arize Phoenix | Open In Colab| ## How to Contribute a Cookbook diff --git a/index.toml b/index.toml index 2de2328..4cf19b9 100644 --- a/index.toml +++ b/index.toml @@ -255,3 +255,15 @@ title = "Agentic RAG with Llama 3.2 3B" notebook = "llama32_agentic_rag.ipynb" topics = ["RAG", "Agents", "Web-QA"] new = true + +[[cookbook]] +title = "Trace a Haystack Pipeline with Arize Phoenix" +notebook = "arize_phoenix_tracing_haystack.ipynb" +topics = ["Observability"] +new = true + +[[cookbook]] +title = "Evaluate RAG with Arize Phoenix" +notebook = "arize_phoenix_evaluate_haystack_rag.ipynb" +topics = ["Observability", "Evaluation", "RAG"] +new = true diff --git a/notebooks/arize_phoenix_evaluate_haystack_rag.ipynb b/notebooks/arize_phoenix_evaluate_haystack_rag.ipynb new file mode 100644 index 0000000..ce0f6db --- /dev/null +++ b/notebooks/arize_phoenix_evaluate_haystack_rag.ipynb @@ -0,0 +1,828 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "4t3LXM0aNbl2" + }, + "source": [ + "
\n", + "

\n", + " \"phoenix\n", + "
\n", + " Docs\n", + " |\n", + " GitHub\n", + " |\n", + " Community\n", + "

\n", + "
\n", + "

Tracing and Evaluating a Haystack Application with Phoenix

\n", + "\n", + "Phoenix is a tool for tracing and evaluating LLM applications. In this tutorial, we will trace and evaluate a Haystack RAG pipeline. We'll evaluate using three different types of evaluations:\n", + "\n", + "1. Relevance: Whether the retrieved documents are relevant to the question.\n", + "2. Q&A Correctness: Whether the answer to the question is correct.\n", + "3. Hallucination: Whether the answer contains hallucinations.\n", + "\n", + "ℹ️ This notebook requires an OpenAI API key.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fGcvMui6fZIA", + "outputId": "95cf1638-aee8-4844-defe-fa501fbe76d2" + }, + "outputs": [], + "source": [ + "!pip install -q openinference-instrumentation-haystack haystack-ai \"arize-phoenix>=4.29.0\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mqitn1QzOU5v" + }, + "source": [ + "# Set API Keys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rDg3U7kbOUX_", + "outputId": "551483a6-52d8-4a9b-b1ff-8ab63864c710" + }, + "outputs": [], + "source": [ + "from getpass import getpass\n", + "import os\n", + "\n", + "if not (openai_api_key := os.getenv(\"OPENAI_API_KEY\")):\n", + " openai_api_key = getpass(\"πŸ”‘ Enter your OpenAI API key: \")\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qlWmN0pvpJCG" + }, + "source": [ + "# Launch Phoenix and Enable Haystack Tracing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3OhQrnlsfgdN" + }, + "outputs": [], + "source": [ + "# Check if PHOENIX_API_KEY is present in the environment variables.\n", + "# If it is, we'll use the cloud instance of Phoenix. If it's not, we'll start a local instance.\n", + "# A third option is to connect to a docker or locally hosted instance.\n", + "# See https://docs.arize.com/phoenix/setup/environments for more information.\n", + "\n", + "import os\n", + "\n", + "if \"PHOENIX_API_KEY\" in os.environ:\n", + " os.environ[\"OTEL_EXPORTER_OTLP_HEADERS\"] = f\"api_key={os.environ['PHOENIX_API_KEY']}\"\n", + " os.environ[\"PHOENIX_CLIENT_HEADERS\"] = f\"api_key={os.environ['PHOENIX_API_KEY']}\"\n", + " os.environ[\"PHOENIX_COLLECTOR_ENDPOINT\"] = \"https://app.phoenix.arize.com\"\n", + "\n", + "else:\n", + " import phoenix as px\n", + "\n", + " px.launch_app()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from openinference.instrumentation.haystack import HaystackInstrumentor\n", + "from phoenix.otel import register\n", + "\n", + "tracer_provider = register()\n", + "\n", + "# Use Phoenix's autoinstrumentor to automatically track traces from Haystack\n", + "HaystackInstrumentor().instrument(tracer_provider=tracer_provider, skip_dep_check=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "twBLgY1LpMPW" + }, + "source": [ + "# Set up your Haystack app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fQYTT4P-fjQa", + "outputId": "41320394-b14e-4dac-e48c-3508c90516eb" + }, + "outputs": [], + "source": [ + "from haystack import Document, Pipeline\n", + "from haystack.components.builders.prompt_builder import PromptBuilder\n", + "from haystack.components.generators import OpenAIGenerator\n", + "from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n", + "from haystack.document_stores.in_memory import InMemoryDocumentStore\n", + "\n", + "# Write documents to InMemoryDocumentStore\n", + "document_store = InMemoryDocumentStore()\n", + "document_store.write_documents(\n", + " [\n", + " Document(content=\"My name is Jean and I live in Paris.\"),\n", + " Document(content=\"My name is Mark and I live in Berlin.\"),\n", + " Document(content=\"My name is Giorgio and I live in Rome.\"),\n", + " ]\n", + ")\n", + "\n", + "# Build a RAG pipeline\n", + "prompt_template = \"\"\"\n", + "Given these documents, answer the question.\n", + "Documents:\n", + "{% for doc in documents %}\n", + " {{ doc.content }}\n", + "{% endfor %}\n", + "Question: {{question}}\n", + "Answer:\n", + "\"\"\"\n", + "\n", + "retriever = InMemoryBM25Retriever(document_store=document_store)\n", + "prompt_builder = PromptBuilder(template=prompt_template)\n", + "llm = OpenAIGenerator(model=\"gpt-3.5-turbo\")\n", + "\n", + "rag_pipeline = Pipeline()\n", + "rag_pipeline.add_component(\"retriever\", retriever)\n", + "rag_pipeline.add_component(\"prompt_builder\", prompt_builder)\n", + "rag_pipeline.add_component(\"llm\", llm)\n", + "rag_pipeline.connect(\"retriever\", \"prompt_builder.documents\")\n", + "rag_pipeline.connect(\"prompt_builder\", \"llm\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ji06yJ2Bfmx9", + "outputId": "aa417de4-8ce9-41b3-c322-e5ed216dadcc" + }, + "outputs": [], + "source": [ + "# Ask a question\n", + "question = \"Who lives in Paris?\"\n", + "results = rag_pipeline.run(\n", + " {\n", + " \"retriever\": {\"query\": question},\n", + " \"prompt_builder\": {\"question\": question},\n", + " }\n", + ")\n", + "\n", + "print(results[\"llm\"][\"replies\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KDsd4qJIXfZv" + }, + "source": [ + "# Evaluating Retrieved Docs\n", + "\n", + "Now that we've traced our pipeline, let's start by evaluating the retrieved documents.\n", + "\n", + "All evaluations in Phoenix use the same general process:\n", + "1. Query and download trace data from Phoenix\n", + "2. Add evaluation labels to the trace data. This can be done using the Phoenix library, using Haystack evaluators, or using your own evaluators.\n", + "3. Log the evaluation labels to Phoenix\n", + "4. View evaluations\n", + "\n", + "We'll use the `get_retrieved_documents` function to get the trace data for the retrieved documents." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_0XDNCW3YFCz" + }, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()\n", + "\n", + "import phoenix as px\n", + "client = px.Client()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 318 + }, + "id": "FT9NbFomYYoX", + "outputId": "c51c4ef2-c738-44e1-e5ed-9f5d1cd6d090" + }, + "outputs": [], + "source": [ + "from phoenix.session.evaluation import get_retrieved_documents\n", + "\n", + "retrieved_documents_df = get_retrieved_documents(px.Client())\n", + "retrieved_documents_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we'll use Phoenix's `RelevanceEvaluator` to evaluate the relevance of the retrieved documents. This evaluator uses a LLM to determine if the retrieved documents contain the answer to the question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "03c4fba631ee4c248a1df176bac0c4d1", + "56de8838c18b4b7bbac4b2b7698dc966", + "ef24004aeccf4a4aa17f415a9f3e8376", + "8761cad8dfb2438bbe2f6a210ea30c44", + "dacebfc1bbeb42418e0af201e66d659c", + "ac230f37777740519cf5fdb17d60873b", + "f4b5da887a1e4b68bb14bda987a80bc4", + "82e3baf2439044a0a5b9f215b1ef585f", + "ba1467f305b74f0eb970572ff2772426", + "471fc55eb46b4d0fb6f96048a0f130b6", + "6d48080a5a9f416fb3fe442e9bde518e" + ] + }, + "id": "RsAJdoFWYZzk", + "outputId": "6a6987fe-6a82-4c9c-da69-15261cffbdd2" + }, + "outputs": [], + "source": [ + "from phoenix.evals import OpenAIModel, RelevanceEvaluator, run_evals\n", + "\n", + "relevance_evaluator = RelevanceEvaluator(OpenAIModel(model=\"gpt-4o-mini\"))\n", + "\n", + "retrieved_documents_relevance_df = run_evals(\n", + " evaluators=[relevance_evaluator],\n", + " dataframe=retrieved_documents_df,\n", + " provide_explanation=True,\n", + " concurrency=20,\n", + ")[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 175 + }, + "id": "p0ANPW35Xhkx", + "outputId": "ffc2a959-c7e1-4fb2-d3aa-18220fb3aaa7" + }, + "outputs": [], + "source": [ + "retrieved_documents_relevance_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we'll log the evaluation labels to Phoenix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TSwP858Wb7Qj", + "outputId": "1acd1b35-3e91-4809-fc59-da919c5350d8" + }, + "outputs": [], + "source": [ + "from phoenix.trace import DocumentEvaluations, SpanEvaluations\n", + "\n", + "px.Client().log_evaluations(\n", + " DocumentEvaluations(dataframe=retrieved_documents_relevance_df, eval_name=\"relevance\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you now click on your document retrieval span in Phoenix, you should see the evaluation labels.\n", + "\n", + "![Evaluations](img/tutorial42_evaluations_1.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O5JBstOntoJx" + }, + "source": [ + "# Evaluate Response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 168 + }, + "id": "WX0iCQJWtwQQ", + "outputId": "a5dfd75b-31b4-430e-f002-4e257abbe995" + }, + "outputs": [], + "source": [ + "from phoenix.session.evaluation import get_qa_with_reference\n", + "\n", + "qa_with_reference_df = get_qa_with_reference(px.Client())\n", + "qa_with_reference_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "dc7bd8404c13468e99fcc37e8377ac57", + "d6308c3845ba42bd80b0e1903f54f5ff", + "294d2ad16e3647c5b1497eccb5fcf155", + "28a7c2f8b2c14bcd8a409f5b5bca7235", + "2fcda3f03a554fceadf85637c34a725a", + "cf9dd33671024014b9536cca596d343e", + "5ed868930d1540b988589a439b1c1b9a", + "81fce336b5f24b148dd557af20f93bc6", + "100cb58ebb3d4ce4a886277a24dd2afc", + "81ad93aae4e94c27af8161f25ba63f98", + "d2809f1d40024b37a6fdd62dd6acb44d" + ] + }, + "id": "O1RozU7ptywT", + "outputId": "1872f2d8-3a1c-4976-8c1d-bff49d7b6994" + }, + "outputs": [], + "source": [ + "from phoenix.evals import (\n", + " HallucinationEvaluator,\n", + " OpenAIModel,\n", + " QAEvaluator,\n", + " run_evals,\n", + ")\n", + "\n", + "qa_evaluator = QAEvaluator(OpenAIModel(model=\"gpt-4-turbo-preview\"))\n", + "hallucination_evaluator = HallucinationEvaluator(OpenAIModel(model=\"gpt-4-turbo-preview\"))\n", + "\n", + "qa_correctness_eval_df, hallucination_eval_df = run_evals(\n", + " evaluators=[qa_evaluator, hallucination_evaluator],\n", + " dataframe=qa_with_reference_df,\n", + " provide_explanation=True,\n", + " concurrency=20,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Gjgqr_Gxt0Ke", + "outputId": "3f8dc15a-a069-46d9-e717-61b8f06b8835" + }, + "outputs": [], + "source": [ + "px.Client().log_evaluations(\n", + " SpanEvaluations(dataframe=qa_correctness_eval_df, eval_name=\"Q&A Correctness\"),\n", + " SpanEvaluations(dataframe=hallucination_eval_df, eval_name=\"Hallucination\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should now see the Q&A correctness and hallucination evaluations in Phoenix.\n", + "\n", + "![Evaluations](img/tutorial42_evaluations_2.png)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "03c4fba631ee4c248a1df176bac0c4d1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_56de8838c18b4b7bbac4b2b7698dc966", + "IPY_MODEL_ef24004aeccf4a4aa17f415a9f3e8376", + "IPY_MODEL_8761cad8dfb2438bbe2f6a210ea30c44" + ], + "layout": "IPY_MODEL_dacebfc1bbeb42418e0af201e66d659c" + } + }, + "471fc55eb46b4d0fb6f96048a0f130b6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "56de8838c18b4b7bbac4b2b7698dc966": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ac230f37777740519cf5fdb17d60873b", + "placeholder": "​", + "style": "IPY_MODEL_f4b5da887a1e4b68bb14bda987a80bc4", + "value": "run_evals " + } + }, + "6d48080a5a9f416fb3fe442e9bde518e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "82e3baf2439044a0a5b9f215b1ef585f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8761cad8dfb2438bbe2f6a210ea30c44": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_471fc55eb46b4d0fb6f96048a0f130b6", + "placeholder": "​", + "style": "IPY_MODEL_6d48080a5a9f416fb3fe442e9bde518e", + "value": " 3/3 (100.0%) | ⏳ 00:01<00:00 |  2.49it/s" + } + }, + "ac230f37777740519cf5fdb17d60873b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ba1467f305b74f0eb970572ff2772426": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "dacebfc1bbeb42418e0af201e66d659c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ef24004aeccf4a4aa17f415a9f3e8376": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_82e3baf2439044a0a5b9f215b1ef585f", + "max": 3, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ba1467f305b74f0eb970572ff2772426", + "value": 3 + } + }, + "f4b5da887a1e4b68bb14bda987a80bc4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/arize_phoenix_tracing_haystack.ipynb b/notebooks/arize_phoenix_tracing_haystack.ipynb new file mode 100644 index 0000000..dc112e3 --- /dev/null +++ b/notebooks/arize_phoenix_tracing_haystack.ipynb @@ -0,0 +1,193 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " \"phoenix\n", + "
\n", + " Docs\n", + " |\n", + " GitHub\n", + " |\n", + " Community\n", + "

\n", + "
\n", + "

Tracing and Evaluating a Haystack Application

\n", + " \n", + "Phoenix makes your Haystack applications *observable* by visualizing the underlying structure of each call to your Haystack Pipelines and surfacing problematic spans of execution based on latency, token count, or other evaluation metrics.\n", + "\n", + "ℹ️ This notebook requires an OpenAI API key.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Install Dependencies & set OpenAI API key" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install arize-phoenix openinference-instrumentation-haystack haystack-ai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from getpass import getpass\n", + "import os\n", + "\n", + "if not (openai_api_key := os.getenv(\"OPENAI_API_KEY\")):\n", + " openai_api_key = getpass(\"πŸ”‘ Enter your OpenAI API key: \")\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Phoenix\n", + "The command below initializes a local version of Phoenix that will run in the notebook. Phoenix also provides self-hosted and cloud deployment options." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import phoenix as px\n", + "session = px.launch_app()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Connect Phoenix to Haystack and Instrument\n", + "The command below connects Phoenix to your Haystack application and instruments the Haystack library. Any calls to Haystack pipelines from this point forward will be traced and logged to the Phoenix UI." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from openinference.instrumentation.haystack import HaystackInstrumentor\n", + "from phoenix.otel import register\n", + "\n", + "tracer_provider = register()\n", + "HaystackInstrumentor().instrument(tracer_provider=tracer_provider)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build a Haystack Pipeline\n", + "The command below builds a simple Haystack pipeline that retrieves documents from an in-memory document store and uses an LLM to answer a question." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from haystack import Document, Pipeline\n", + "from haystack.components.builders.prompt_builder import PromptBuilder\n", + "from haystack.components.generators import OpenAIGenerator\n", + "from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n", + "from haystack.document_stores.in_memory import InMemoryDocumentStore\n", + "\n", + "document_store = InMemoryDocumentStore()\n", + "document_store.write_documents(\n", + " [\n", + " Document(content=\"My name is Jean and I live in Paris.\"),\n", + " Document(content=\"My name is Mark and I live in Berlin.\"),\n", + " Document(content=\"My name is Giorgio and I live in Rome.\"),\n", + " ]\n", + ")\n", + "\n", + "prompt_template = \"\"\"\n", + "Given these documents, answer the question.\n", + "Documents:\n", + "{% for doc in documents %}\n", + " {{ doc.content }}\n", + "{% endfor %}\n", + "Question: {{question}}\n", + "Answer:\n", + "\"\"\"\n", + "\n", + "retriever = InMemoryBM25Retriever(document_store=document_store)\n", + "prompt_builder = PromptBuilder(template=prompt_template)\n", + "llm = OpenAIGenerator()\n", + "\n", + "rag_pipeline = Pipeline()\n", + "rag_pipeline.add_component(\"retriever\", retriever)\n", + "rag_pipeline.add_component(\"prompt_builder\", prompt_builder)\n", + "rag_pipeline.add_component(\"llm\", llm)\n", + "rag_pipeline.connect(\"retriever\", \"prompt_builder.documents\")\n", + "rag_pipeline.connect(\"prompt_builder\", \"llm\")\n", + "\n", + "question = \"Who lives in Paris?\"\n", + "results = rag_pipeline.run(\n", + " {\n", + " \"retriever\": {\"query\": question},\n", + " \"prompt_builder\": {\"question\": question},\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## View the Pipeline in Phoenix\n", + "You should now see traces in Phoenix!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Phoenix is currently running on {session.url}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "phoenix", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 0d4e1f6facc38426e999e80365e775abda95d9dd Mon Sep 17 00:00:00 2001 From: JohnGilhuly Date: Thu, 26 Sep 2024 16:29:32 -0700 Subject: [PATCH 2/2] Update image urls --- notebooks/arize_phoenix_evaluate_haystack_rag.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/arize_phoenix_evaluate_haystack_rag.ipynb b/notebooks/arize_phoenix_evaluate_haystack_rag.ipynb index ce0f6db..84420b6 100644 --- a/notebooks/arize_phoenix_evaluate_haystack_rag.ipynb +++ b/notebooks/arize_phoenix_evaluate_haystack_rag.ipynb @@ -353,7 +353,7 @@ "source": [ "If you now click on your document retrieval span in Phoenix, you should see the evaluation labels.\n", "\n", - "![Evaluations](img/tutorial42_evaluations_1.png)\n" + "![Evaluations](https://github.com/Jgilhuly/phoenix-assets/blob/main/images/screenshots/haystack_evals_1.png?raw=true)\n" ] }, { @@ -452,7 +452,7 @@ "source": [ "You should now see the Q&A correctness and hallucination evaluations in Phoenix.\n", "\n", - "![Evaluations](img/tutorial42_evaluations_2.png)" + "![Evaluations](https://github.com/Jgilhuly/phoenix-assets/blob/main/images/screenshots/haystack_evals_2.png?raw=true)" ] } ],