From 082b160589be5316809630e8c96323a9aff286af Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Tue, 16 Jun 2026 15:06:54 +0200 Subject: [PATCH 1/2] Add individual tracing docs pages --- .../pipelines/debugging-pipelines.mdx | 2 +- docs-website/docs/development/tracing.mdx | 300 +----------------- .../development/tracing/custom-tracer.mdx | 82 +++++ .../docs/development/tracing/datadog.mdx | 94 ++++++ .../docs/development/tracing/langfuse.mdx | 110 +++++++ .../development/tracing/logging-tracer.mdx | 61 ++++ .../docs/development/tracing/mlflow.mdx | 51 +++ .../development/tracing/opentelemetry.mdx | 172 ++++++++++ .../docs/development/tracing/weave.mdx | 93 ++++++ docs-website/sidebars.js | 18 +- 10 files changed, 694 insertions(+), 289 deletions(-) create mode 100644 docs-website/docs/development/tracing/custom-tracer.mdx create mode 100644 docs-website/docs/development/tracing/datadog.mdx create mode 100644 docs-website/docs/development/tracing/langfuse.mdx create mode 100644 docs-website/docs/development/tracing/logging-tracer.mdx create mode 100644 docs-website/docs/development/tracing/mlflow.mdx create mode 100644 docs-website/docs/development/tracing/opentelemetry.mdx create mode 100644 docs-website/docs/development/tracing/weave.mdx diff --git a/docs-website/docs/concepts/pipelines/debugging-pipelines.mdx b/docs-website/docs/concepts/pipelines/debugging-pipelines.mdx index 4665d1c195..5dc8b28eb3 100644 --- a/docs-website/docs/concepts/pipelines/debugging-pipelines.mdx +++ b/docs-website/docs/concepts/pipelines/debugging-pipelines.mdx @@ -116,7 +116,7 @@ Here’s what the resulting log would look like when a pipeline is run: ## Tracing -To get a bigger picture of the pipeline’s performance, try tracing it with [Langfuse](../../development/tracing.mdx#langfuse). +To get a bigger picture of the pipeline’s performance, try tracing it with [Langfuse](../../development/tracing/langfuse.mdx). Our [Tracing](../../development/tracing.mdx) page has more about other tracing solutions for Haystack. diff --git a/docs-website/docs/development/tracing.mdx b/docs-website/docs/development/tracing.mdx index 13538dcea9..181c897e06 100644 --- a/docs-website/docs/development/tracing.mdx +++ b/docs-website/docs/development/tracing.mdx @@ -2,200 +2,26 @@ title: "Tracing" id: tracing slug: "/tracing" -description: "This page explains how to use tracing in Haystack. It describes how to set up a tracing backend with OpenTelemetry, Datadog, or your own solution. This can help you monitor your app's performance and optimize it." +description: "This page explains how to use tracing in Haystack. It lists the tracing backends Haystack supports out of the box and explains how to enable, configure, and disable tracing." --- -import ClickableImage from "@site/src/components/ClickableImage"; - # Tracing -This page explains how to use tracing in Haystack. It describes how to set up a tracing backend with OpenTelemetry, Datadog, or your own solution. This can help you monitor your app's performance and optimize it. - -Traces document the flow of requests through your application and are vital for monitoring applications in production. This helps to understand the execution order of your pipeline components and analyze where your pipeline spends the most time. - -## Configuring a Tracing Backend - -Instrumented applications typically send traces to a trace collector or a tracing backend. Haystack provides out-of-the-box support for [OpenTelemetry](https://opentelemetry.io/) and [Datadog](https://app.datadoghq.eu/dashboard/lists). You can also quickly implement support for additional providers of your choosing. - -### OpenTelemetry - -To use OpenTelemetry as your tracing backend, follow these steps: - -1. Install the [OpenTelemetry SDK](https://opentelemetry.io/docs/languages/python/): - - ```shell - pip install opentelemetry-sdk - pip install opentelemetry-exporter-otlp - ``` -2. To add traces to even deeper levels of your pipelines, we recommend you check out [OpenTelemetry integrations](https://opentelemetry.io/ecosystem/registry/?s=python), such as: - - [`urllib3` instrumentation](https://github.com/open-telemetry/opentelemetry-python-contrib/tree/main/instrumentation/opentelemetry-instrumentation-urllib3) for tracing HTTP requests in your pipeline, - - [OpenAI instrumentation](https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-openai) for tracing OpenAI requests. -3. There are two options for how to hook Haystack to the OpenTelemetry SDK. - - - Run your Haystack applications using OpenTelemetry’s [automated instrumentation](https://opentelemetry.io/docs/languages/python/getting-started/#instrumentation). Haystack will automatically detect the configured tracing backend and use it to send traces. - - First, install the `OpenTelemetry` CLI: - - ```shell - pip install opentelemetry-distro - ``` - - Then, run your Haystack application using the OpenTelemetry SDK: - - ```shell - opentelemetry-instrument \ - --traces_exporter console \ - --metrics_exporter console \ - --logs_exporter console \ - --service_name my-haystack-app \ - - ``` - - — or — - - - Configure the tracing backend in your Python code: - - ```python - from haystack import tracing - - from opentelemetry import trace - from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter - from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.sdk.trace.export import BatchSpanProcessor - from opentelemetry.sdk.resources import Resource - from opentelemetry.semconv.resource import ResourceAttributes - - # Service name is required for most backends - resource = Resource(attributes={ - ResourceAttributes.SERVICE_NAME: "haystack" # Correct constant - }) - - tracer_provider = TracerProvider(resource=resource) - processor = BatchSpanProcessor(OTLPSpanExporter(endpoint="http://localhost:4318/v1/traces")) - tracer_provider.add_span_processor(processor) - trace.set_tracer_provider(tracer_provider) - - # Tell Haystack to auto-detect the configured tracer - import haystack.tracing - haystack.tracing.auto_enable_tracing() - - # Explicitly tell Haystack to use your tracer - from haystack.tracing import OpenTelemetryTracer - - tracer = tracer_provider.get_tracer("my_application") - tracing.enable_tracing(OpenTelemetryTracer(tracer)) - ``` - -### Datadog - -The `DatadogConnector` component lets you trace your Haystack pipelines with [Datadog](https://www.datadoghq.com/). - -Simply install the integration with `pip install datadog-haystack`, then add the connector to your pipeline. - -:::info -Check out the [integration page](https://haystack.deepset.ai/integrations/datadog) for more details and example usage. -::: - -### Langfuse - -`LangfuseConnector` component allows you to easily trace your Haystack pipelines with the Langfuse UI. - -Simply install the component with `pip install langfuse-haystack`, then add it to your pipeline. - -:::info -Check out the component's [documentation page](../pipeline-components/connectors/langfuseconnector.mdx) for more details and example usage, or our [blog post](https://haystack.deepset.ai/blog/langfuse-integration) for the complete walkthrough. -::: - - -### MLflow - -[MLflow](https://mlflow.org/) is an open-source platform for managing the end-to-end machine learning and AI lifecycle. MLflow provides native tracing support for Haystack. Simply install MLflow and enable automatic tracing with a single line of code. - -```shell -pip install mlflow -``` - -```python -import mlflow - -mlflow.haystack.autolog() -# Optionally set an experiment name -mlflow.set_experiment("Haystack") -``` - -This automatically captures traces from all Haystack pipelines and components, including latencies, token usage, cost, and any exceptions. - -:::info -Check out the [MLflow Haystack integration guide](https://haystack.deepset.ai/integrations/mlflow) for a full walkthrough with examples. -::: +Traces document the flow of requests through your application and are vital for monitoring applications in production. This helps you understand the execution order of your pipeline components and analyze where your pipeline spends the most time. -### Weights & Biases Weave +Instrumented applications typically send traces to a trace collector or a tracing backend. Haystack provides out-of-the-box support for several backends, and you can also quickly implement support for additional providers of your choosing. -The `WeaveConnector` component allows you to trace and visualize your pipeline execution in [Weights & Biases](https://wandb.ai/site/) framework. +## Supported Tracers -You will first need to create a free account on Weights & Biases website and get your API key, as well as install the integration with `pip install weights_biases-haystack`. - -:::info -Check out the component's [documentation page](../pipeline-components/connectors/weaveconnector.mdx) for more details and example usage. -::: - -### Custom Tracing Backend - -To use your custom tracing backend with Haystack, follow these steps: - -1. Implement the `Tracer` interface. The following code snippet provides an example using the OpenTelemetry package: - - ```python - import contextlib - from typing import Optional, Dict, Any, Iterator - - from opentelemetry import trace - from opentelemetry.trace import NonRecordingSpan - - from haystack.tracing import Tracer, Span - from haystack.tracing import utils as tracing_utils - import opentelemetry.trace - - class OpenTelemetrySpan(Span): - def __init__(self, span: opentelemetry.trace.Span) -> None: - self._span = span - - def set_tag(self, key: str, value: Any) -> None: - # Tracing backends usually don't support any tag value - # `coerce_tag_value` forces the value to either be a Python - # primitive (int, float, boolean, str) or tries to dump it as string. - coerced_value = tracing_utils.coerce_tag_value(value) - self._span.set_attribute(key, coerced_value) - - class OpenTelemetryTracer(Tracer): - def __init__(self, tracer: opentelemetry.trace.Tracer) -> None: - self._tracer = tracer - - @contextlib.contextmanager - def trace(self, operation_name: str, tags: Optional[Dict[str, Any]] = None) -> Iterator[Span]: - with self._tracer.start_as_current_span(operation_name) as span: - span = OpenTelemetrySpan(span) - if tags: - span.set_tags(tags) - - yield span - - def current_span(self) -> Optional[Span]: - current_span = trace.get_current_span() - if isinstance(current_span, NonRecordingSpan): - return None - - return OpenTelemetrySpan(current_span) - ``` - -2. Tell Haystack to use your custom tracer: - - ```python - from haystack import tracing - - haystack_tracer = OpenTelemetryTracer(tracer) - tracing.enable_tracing(haystack_tracer) - ``` +| Tracer | Description | +| --- | --- | +| [OpenTelemetry](tracing/opentelemetry.mdx) | Send traces to any [OpenTelemetry](https://opentelemetry.io/)-compatible backend. Includes a Jaeger setup for local development. | +| [MLflow](tracing/mlflow.mdx) | Capture traces with [MLflow](https://mlflow.org/)'s native Haystack tracing support. | +| [Datadog](tracing/datadog.mdx) | Trace your pipelines with [Datadog](https://www.datadoghq.com/) using the `DatadogTracer` or the `DatadogConnector` component. | +| [Langfuse](tracing/langfuse.mdx) | Trace your pipelines with the [Langfuse](https://langfuse.com/) UI using the `LangfuseTracer` or the `LangfuseConnector` component. | +| [Weights & Biases Weave](tracing/weave.mdx) | Trace and visualize pipeline execution in [Weights & Biases](https://wandb.ai/site/) using the `WeaveTracer` or the `WeaveConnector` component. | +| [LoggingTracer](tracing/logging-tracer.mdx) | Inspect the data flowing through your pipeline in real time through logs, with no backend setup. | +| [Custom Tracer](tracing/custom-tracer.mdx) | Connect any tracing backend by implementing the `Tracer` interface. | ## Disabling Auto Tracing @@ -237,103 +63,3 @@ To enable content tracing, there are two options: tracing.tracer.is_content_tracing_enabled = True ``` - -## Visualizing Traces During Development - -Use [Jaeger](https://www.jaegertracing.io/docs/1.6/getting-started/) as a lightweight tracing backend for local pipeline development. This allows you to experiment with tracing without the need for a complex tracing backend. - - -1. Run the Jaeger container. This creates a tracing backend as well as a UI to visualize the traces: - - ```shell - docker run --rm -d --name jaeger \ - -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \ - -p 6831:6831/udp \ - -p 6832:6832/udp \ - -p 5778:5778 \ - -p 16686:16686 \ - -p 4317:4317 \ - -p 4318:4318 \ - -p 14250:14250 \ - -p 14268:14268 \ - -p 14269:14269 \ - -p 9411:9411 \ - jaegertracing/all-in-one:latest - ``` -2. Install the OpenTelemetry SDK: - - ```shell - pip install opentelemetry-sdk - pip install opentelemetry-exporter-otlp - ``` -3. Configure `OpenTelemetry` to use the Jaeger backend: - - ```python - from opentelemetry.sdk.resources import Resource - from opentelemetry.semconv.resource import ResourceAttributes - - from opentelemetry import trace - from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter - from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.sdk.trace.export import BatchSpanProcessor - - # Service name is required for most backends - resource = Resource(attributes={ - ResourceAttributes.SERVICE_NAME: "haystack" - }) - - tracer_provider = TracerProvider(resource=resource) - processor = BatchSpanProcessor(OTLPSpanExporter(endpoint="http://localhost:4318/v1/traces")) - tracer_provider.add_span_processor(processor) - trace.set_tracer_provider(tracer_provider) - ``` -4. Tell Haystack to use OpenTelemetry for tracing: - - ```python - import haystack.tracing - - haystack.tracing.auto_enable_tracing() - ``` -5. Run your pipeline: - - ```python - ... - pipeline.run(...) - ... - ``` -6. Inspect the traces in the UI provided by Jaeger at [http://localhost:16686](http://localhost:16686/search). - -## Real-Time Pipeline Logging - -Use Haystack's [`LoggingTracer`](https://github.com/deepset-ai/haystack/blob/main/haystack/tracing/logging_tracer.py) logs to inspect the data that's flowing through your pipeline in real-time. - -This feature is particularly helpful during experimentation and prototyping, as you don’t need to set up any tracing backend beforehand. - -Here’s how you can enable this tracer. In this example, we are adding color tags (this is optional) to highlight the components' names and inputs: - -```python -import logging -from haystack import tracing -from haystack.tracing.logging_tracer import LoggingTracer - -logging.basicConfig( - format="%(levelname)s - %(name)s - %(message)s", - level=logging.WARNING, -) -logging.getLogger("haystack").setLevel(logging.DEBUG) - -tracing.tracer.is_content_tracing_enabled = ( - True # to enable tracing/logging content (inputs/outputs) -) -tracing.enable_tracing( - LoggingTracer( - tags_color_strings={ - "haystack.component.input": "\x1b[1;31m", - "haystack.component.name": "\x1b[1;34m", - }, - ), -) -``` - -Here’s what the resulting log would look like when a pipeline is run: - diff --git a/docs-website/docs/development/tracing/custom-tracer.mdx b/docs-website/docs/development/tracing/custom-tracer.mdx new file mode 100644 index 0000000000..ae40f71320 --- /dev/null +++ b/docs-website/docs/development/tracing/custom-tracer.mdx @@ -0,0 +1,82 @@ +--- +title: "Custom Tracer" +id: tracing-custom-tracer +slug: "/tracing-custom-tracer" +description: "Learn how to connect Haystack to a custom tracing backend by implementing the Tracer interface." +--- + +# Custom Tracer + +Learn how to connect Haystack to a custom tracing backend by implementing the `Tracer` interface. + +
+ +| | | +| --- | --- | +| **Base classes** | `Tracer` and `Span` | +| **How to enable** | Implement the `Tracer` interface, then `tracing.enable_tracing(your_tracer)` | +| **Content tracing** | Optional. Set `HAYSTACK_CONTENT_TRACING_ENABLED` to `true` to trace component inputs and outputs | +| **Package** | Built into Haystack | +| **GitHub link** | https://github.com/deepset-ai/haystack/blob/main/haystack/tracing/tracer.py | + +
+ +## Overview + +If your tracing backend isn't supported out of the box, you can connect it to Haystack by implementing the `Tracer` interface. This gives you full control over how spans are created and how tags are recorded. + +## Usage + +1. Implement the `Tracer` interface. The following code snippet provides an example using the OpenTelemetry package: + + ```python + import contextlib + from typing import Optional, Dict, Any, Iterator + + from opentelemetry import trace + from opentelemetry.trace import NonRecordingSpan + + from haystack.tracing import Tracer, Span + from haystack.tracing import utils as tracing_utils + import opentelemetry.trace + + class OpenTelemetrySpan(Span): + def __init__(self, span: opentelemetry.trace.Span) -> None: + self._span = span + + def set_tag(self, key: str, value: Any) -> None: + # Tracing backends usually don't support any tag value + # `coerce_tag_value` forces the value to either be a Python + # primitive (int, float, boolean, str) or tries to dump it as string. + coerced_value = tracing_utils.coerce_tag_value(value) + self._span.set_attribute(key, coerced_value) + + class OpenTelemetryTracer(Tracer): + def __init__(self, tracer: opentelemetry.trace.Tracer) -> None: + self._tracer = tracer + + @contextlib.contextmanager + def trace(self, operation_name: str, tags: Optional[Dict[str, Any]] = None) -> Iterator[Span]: + with self._tracer.start_as_current_span(operation_name) as span: + span = OpenTelemetrySpan(span) + if tags: + span.set_tags(tags) + + yield span + + def current_span(self) -> Optional[Span]: + current_span = trace.get_current_span() + if isinstance(current_span, NonRecordingSpan): + return None + + return OpenTelemetrySpan(current_span) + ``` + +2. Tell Haystack to use your custom tracer: + + ```python + from haystack import tracing + + haystack_tracer = OpenTelemetryTracer(tracer) + tracing.enable_tracing(haystack_tracer) + ``` diff --git a/docs-website/docs/development/tracing/datadog.mdx b/docs-website/docs/development/tracing/datadog.mdx new file mode 100644 index 0000000000..6c433d7f48 --- /dev/null +++ b/docs-website/docs/development/tracing/datadog.mdx @@ -0,0 +1,94 @@ +--- +title: "Datadog" +id: tracing-datadog +slug: "/tracing-datadog" +description: "Learn how to trace your Haystack pipelines with Datadog." +--- + +# Datadog + +Learn how to trace your Haystack pipelines with Datadog. + +
+ +| | | +| --- | --- | +| **Tracer class** | `DatadogTracer` | +| **How to enable** | Enable the tracer with `tracing.enable_tracing(DatadogTracer(ddtrace.tracer))`, or add the `DatadogConnector` component to your pipeline | +| **Content tracing** | Set `HAYSTACK_CONTENT_TRACING_ENABLED` to `true` to trace component inputs and outputs | +| **Package** | `datadog-haystack` | +| **API reference** | [datadog](/reference/integrations-datadog) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/datadog | + +
+ +## Overview + +Trace your Haystack pipelines with [Datadog](https://www.datadoghq.com/) through [Datadog's tracing library `ddtrace`](https://ddtrace.readthedocs.io/en/stable/). Haystack captures detailed information about pipeline runs, like API calls, context data, and prompts, so you can see the complete trace of your pipeline execution in Datadog. + +## Installation + +Install the `datadog-haystack` package: + +```shell +pip install datadog-haystack +``` + +## Prerequisites + +1. A way to receive traces, such as a running [Datadog Agent](https://docs.datadoghq.com/agent/). `ddtrace` sends traces to the Datadog Agent at `localhost:8126` by default. +2. Configure `ddtrace` through the standard mechanisms, for example the `DD_SERVICE`, `DD_ENV`, and `DD_VERSION` environment variables, or by running your application with the `ddtrace-run` command. See the [ddtrace documentation](https://ddtrace.readthedocs.io/en/stable/) for more details. + +## Usage + +Enable the `DatadogTracer` directly to trace any Haystack pipeline, without adding a component to it. Make sure to set the `HAYSTACK_CONTENT_TRACING_ENABLED` environment variable before importing any Haystack components. + +```python +import os + +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + +import ddtrace + +from haystack import Pipeline, tracing +from haystack.components.builders import ChatPromptBuilder +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.dataclasses import ChatMessage + +from haystack_integrations.tracing.datadog import DatadogTracer + +# Enable the Datadog tracer +tracing.enable_tracing(DatadogTracer(ddtrace.tracer)) + +pipe = Pipeline() +pipe.add_component("prompt_builder", ChatPromptBuilder()) +pipe.add_component("llm", OpenAIChatGenerator()) +pipe.connect("prompt_builder.prompt", "llm.messages") + +messages = [ + ChatMessage.from_system( + "Always respond in German even if some input data is in other languages.", + ), + ChatMessage.from_user("Tell me about {{location}}"), +] + +response = pipe.run( + data={ + "prompt_builder": { + "template_variables": {"location": "Berlin"}, + "template": messages, + }, + }, +) +print(response["llm"]["replies"][0]) +``` + +Each pipeline run produces a trace that includes the entire execution context, including prompts, completions, and metadata. You can then view the traces in your Datadog dashboard. + +## Alternative: the DatadogConnector component + +If you prefer to manage tracing as part of your pipeline definition (for example, so it serializes to YAML), you can add the `DatadogConnector` component instead. It enables the same Datadog tracing as soon as it is initialized. + +:::info +See the [`DatadogConnector` documentation page](../../pipeline-components/connectors/datadogconnector.mdx) for full usage examples, or check out the [integration page](https://haystack.deepset.ai/integrations/datadog). +::: diff --git a/docs-website/docs/development/tracing/langfuse.mdx b/docs-website/docs/development/tracing/langfuse.mdx new file mode 100644 index 0000000000..81c20f5330 --- /dev/null +++ b/docs-website/docs/development/tracing/langfuse.mdx @@ -0,0 +1,110 @@ +--- +title: "Langfuse" +id: tracing-langfuse +slug: "/tracing-langfuse" +description: "Learn how to trace your Haystack pipelines with Langfuse." +--- + +import ClickableImage from "@site/src/components/ClickableImage"; + +# Langfuse + +Learn how to trace your Haystack pipelines with Langfuse. + +
+ +| | | +| --- | --- | +| **Tracer class** | `LangfuseTracer` | +| **How to enable** | Enable the tracer with `tracing.enable_tracing(LangfuseTracer(langfuse))`, or add the `LangfuseConnector` component to your pipeline | +| **Content tracing** | Required. Set `HAYSTACK_CONTENT_TRACING_ENABLED` to `true` | +| **Package** | `langfuse-haystack` | +| **API reference** | [langfuse](/reference/integrations-langfuse) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/langfuse | + +
+ +## Overview + +Trace your Haystack pipelines with the [Langfuse](https://langfuse.com/) UI. Langfuse captures detailed information about pipeline runs, like API calls, context data, prompts, and more. Use it to monitor model performance such as token usage and cost, find areas for improvement, and create datasets from your pipeline executions. + +## Installation + +Install the `langfuse-haystack` package: + +```shell +pip install langfuse-haystack +``` + +## Prerequisites + +1. An active Langfuse [account](https://cloud.langfuse.com/). +2. Set the `LANGFUSE_SECRET_KEY` and `LANGFUSE_PUBLIC_KEY` environment variables with your Langfuse secret and public keys, found in your account profile. +3. Set the `HAYSTACK_CONTENT_TRACING_ENABLED` environment variable to `true` to enable tracing. + +:::info[Usage Notice] +To ensure proper tracing, always set environment variables before importing any Haystack components. This is crucial because Haystack initializes its internal tracing components during import. An even better practice is to set these environment variables in your shell before running the script. +::: + +## Usage + +Enable the `LangfuseTracer` directly to trace any Haystack pipeline, without adding a component to it. + +```python +import os + +os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" +os.environ["LANGFUSE_SECRET_KEY"] = "" +os.environ["LANGFUSE_PUBLIC_KEY"] = "" +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + +from langfuse import Langfuse + +from haystack import Pipeline, tracing +from haystack.components.builders import ChatPromptBuilder +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.dataclasses import ChatMessage + +from haystack_integrations.tracing.langfuse import LangfuseTracer + +# Enable the Langfuse tracer. The client reads your keys from the environment. +langfuse = Langfuse() +langfuse_tracer = LangfuseTracer(langfuse, name="Chat example") +tracing.enable_tracing(langfuse_tracer) + +pipe = Pipeline() +pipe.add_component("prompt_builder", ChatPromptBuilder()) +pipe.add_component("llm", OpenAIChatGenerator()) +pipe.connect("prompt_builder.prompt", "llm.messages") + +messages = [ + ChatMessage.from_system( + "Always respond in German even if some input data is in other languages.", + ), + ChatMessage.from_user("Tell me about {{location}}"), +] + +response = pipe.run( + data={ + "prompt_builder": { + "template_variables": {"location": "Berlin"}, + "template": messages, + }, + }, +) +print(response["llm"]["replies"][0]) + +# Flush any pending spans before the program exits +langfuse_tracer.flush() +``` + +Each pipeline run produces one trace that includes the entire execution context, including prompts, completions, and metadata. You can then view the trace in the Langfuse UI. + + +## Alternative: the LangfuseConnector component + +If you prefer to manage tracing as part of your pipeline definition, you can add the `LangfuseConnector` component instead. It enables the same Langfuse tracing, exposes the `trace_url` as an output, and supports a custom `SpanHandler` for advanced span processing. + +:::info +See the [`LangfuseConnector` documentation page](../../pipeline-components/connectors/langfuseconnector.mdx) for full usage examples and advanced span customization, or read the [blog post](https://haystack.deepset.ai/blog/langfuse-integration) for a complete walkthrough. +::: diff --git a/docs-website/docs/development/tracing/logging-tracer.mdx b/docs-website/docs/development/tracing/logging-tracer.mdx new file mode 100644 index 0000000000..fd4dedbaca --- /dev/null +++ b/docs-website/docs/development/tracing/logging-tracer.mdx @@ -0,0 +1,61 @@ +--- +title: "LoggingTracer" +id: tracing-logging-tracer +slug: "/tracing-logging-tracer" +description: "Learn how to inspect the data flowing through your Haystack pipelines in real time with the LoggingTracer." +--- + +import ClickableImage from "@site/src/components/ClickableImage"; + +# LoggingTracer + +Learn how to inspect the data flowing through your Haystack pipelines in real time with the `LoggingTracer`. + +
+ +| | | +| --- | --- | +| **Tracer class** | `LoggingTracer` | +| **How to enable** | `tracing.enable_tracing(LoggingTracer(...))` | +| **Content tracing** | Required to log inputs and outputs. Set `tracing.tracer.is_content_tracing_enabled = True` | +| **Package** | Built into Haystack | +| **GitHub link** | https://github.com/deepset-ai/haystack/blob/main/haystack/tracing/logging_tracer.py | + +
+ +## Overview + +Use Haystack's [`LoggingTracer`](https://github.com/deepset-ai/haystack/blob/main/haystack/tracing/logging_tracer.py) logs to inspect the data that's flowing through your pipeline in real time. + +This feature is particularly helpful during experimentation and prototyping, as you don’t need to set up any tracing backend beforehand. + +## Usage + +Here’s how you can enable this tracer. In this example, we are adding color tags (this is optional) to highlight the components' names and inputs: + +```python +import logging +from haystack import tracing +from haystack.tracing.logging_tracer import LoggingTracer + +logging.basicConfig( + format="%(levelname)s - %(name)s - %(message)s", + level=logging.WARNING, +) +logging.getLogger("haystack").setLevel(logging.DEBUG) + +tracing.tracer.is_content_tracing_enabled = ( + True # to enable tracing/logging content (inputs/outputs) +) +tracing.enable_tracing( + LoggingTracer( + tags_color_strings={ + "haystack.component.input": "\x1b[1;31m", + "haystack.component.name": "\x1b[1;34m", + }, + ), +) +``` + +Here’s what the resulting log would look like when a pipeline is run: + diff --git a/docs-website/docs/development/tracing/mlflow.mdx b/docs-website/docs/development/tracing/mlflow.mdx new file mode 100644 index 0000000000..f06a79f8c1 --- /dev/null +++ b/docs-website/docs/development/tracing/mlflow.mdx @@ -0,0 +1,51 @@ +--- +title: "MLflow" +id: tracing-mlflow +slug: "/tracing-mlflow" +description: "Learn how to trace your Haystack pipelines with MLflow." +--- + +# MLflow + +Learn how to trace your Haystack pipelines with MLflow. + +
+ +| | | +| --- | --- | +| **How to enable** | `mlflow.haystack.autolog()` | +| **Content tracing** | Captured automatically, including latencies, token usage, cost, and exceptions | +| **Package** | `mlflow` | +| **Integration guide** | https://haystack.deepset.ai/integrations/mlflow | + +
+ +## Overview + +[MLflow](https://mlflow.org/) is an open-source platform for managing the end-to-end machine learning and AI lifecycle. MLflow provides native tracing support for Haystack, so you can capture traces from all your pipelines and components with a single line of code. + +## Installation + +Install MLflow: + +```shell +pip install mlflow +``` + +## Usage + +Enable automatic tracing for all Haystack pipelines and components: + +```python +import mlflow + +mlflow.haystack.autolog() +# Optionally set an experiment name +mlflow.set_experiment("Haystack") +``` + +This automatically captures traces from all Haystack pipelines and components, including latencies, token usage, cost, and any exceptions. + +:::info +Check out the [MLflow Haystack integration guide](https://haystack.deepset.ai/integrations/mlflow) for a full walkthrough with examples. +::: diff --git a/docs-website/docs/development/tracing/opentelemetry.mdx b/docs-website/docs/development/tracing/opentelemetry.mdx new file mode 100644 index 0000000000..0b0beaac8a --- /dev/null +++ b/docs-website/docs/development/tracing/opentelemetry.mdx @@ -0,0 +1,172 @@ +--- +title: "OpenTelemetry" +id: tracing-opentelemetry +slug: "/tracing-opentelemetry" +description: "Learn how to trace your Haystack pipelines with OpenTelemetry." +--- + +import ClickableImage from "@site/src/components/ClickableImage"; + +# OpenTelemetry + +Learn how to trace your Haystack pipelines with OpenTelemetry. + +
+ +| | | +| --- | --- | +| **Tracer class** | `OpenTelemetryTracer` | +| **How to enable** | Auto-detected when the OpenTelemetry SDK is configured, or explicitly with `tracing.enable_tracing(OpenTelemetryTracer(tracer))` | +| **Content tracing** | Optional. Set `HAYSTACK_CONTENT_TRACING_ENABLED` to `true` to trace component inputs and outputs | +| **Package** | `opentelemetry-sdk`, `opentelemetry-exporter-otlp` | +| **GitHub link** | https://github.com/deepset-ai/haystack/blob/main/haystack/tracing/opentelemetry.py | + +
+ +## Overview + +[OpenTelemetry](https://opentelemetry.io/) is an open-source observability framework for collecting traces, metrics, and logs. Haystack provides out-of-the-box support for OpenTelemetry, so you can send traces of your pipeline runs to any OpenTelemetry-compatible backend. + +## Installation + +Install the [OpenTelemetry SDK](https://opentelemetry.io/docs/languages/python/): + +```shell +pip install opentelemetry-sdk +pip install opentelemetry-exporter-otlp +``` + +To add traces to even deeper levels of your pipelines, we recommend you check out [OpenTelemetry integrations](https://opentelemetry.io/ecosystem/registry/?s=python), such as: + +- [`urllib3` instrumentation](https://github.com/open-telemetry/opentelemetry-python-contrib/tree/main/instrumentation/opentelemetry-instrumentation-urllib3) for tracing HTTP requests in your pipeline, +- [OpenAI instrumentation](https://github.com/traceloop/openllmetry/tree/main/packages/opentelemetry-instrumentation-openai) for tracing OpenAI requests. + +## Usage + +There are two options for how to hook Haystack to the OpenTelemetry SDK. + +### Automated instrumentation + +Run your Haystack applications using OpenTelemetry’s [automated instrumentation](https://opentelemetry.io/docs/languages/python/getting-started/#instrumentation). Haystack will automatically detect the configured tracing backend and use it to send traces. + +First, install the `OpenTelemetry` CLI: + +```shell +pip install opentelemetry-distro +``` + +Then, run your Haystack application using the OpenTelemetry SDK: + +```shell +opentelemetry-instrument \ + --traces_exporter console \ + --metrics_exporter console \ + --logs_exporter console \ + --service_name my-haystack-app \ + +``` + +### Configuring the tracing backend in Python + +Alternatively, configure the tracing backend directly in your Python code: + +```python +from haystack import tracing + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.sdk.resources import Resource +from opentelemetry.semconv.resource import ResourceAttributes + +# Service name is required for most backends +resource = Resource( + attributes={ + ResourceAttributes.SERVICE_NAME: "haystack", # Correct constant + }, +) + +tracer_provider = TracerProvider(resource=resource) +processor = BatchSpanProcessor( + OTLPSpanExporter(endpoint="http://localhost:4318/v1/traces"), +) +tracer_provider.add_span_processor(processor) +trace.set_tracer_provider(tracer_provider) + +# Tell Haystack to auto-detect the configured tracer +import haystack.tracing + +haystack.tracing.auto_enable_tracing() + +# Explicitly tell Haystack to use your tracer +from haystack.tracing import OpenTelemetryTracer + +tracer = tracer_provider.get_tracer("my_application") +tracing.enable_tracing(OpenTelemetryTracer(tracer)) +``` + +## Visualizing Traces During Development + +Use [Jaeger](https://www.jaegertracing.io/docs/1.6/getting-started/) as a lightweight tracing backend for local pipeline development. This allows you to experiment with tracing without the need for a complex tracing backend. + + +1. Run the Jaeger container. This creates a tracing backend as well as a UI to visualize the traces: + + ```shell + docker run --rm -d --name jaeger \ + -e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \ + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p 16686:16686 \ + -p 4317:4317 \ + -p 4318:4318 \ + -p 14250:14250 \ + -p 14268:14268 \ + -p 14269:14269 \ + -p 9411:9411 \ + jaegertracing/all-in-one:latest + ``` +2. Install the OpenTelemetry SDK: + + ```shell + pip install opentelemetry-sdk + pip install opentelemetry-exporter-otlp + ``` +3. Configure `OpenTelemetry` to use the Jaeger backend: + + ```python + from opentelemetry.sdk.resources import Resource + from opentelemetry.semconv.resource import ResourceAttributes + + from opentelemetry import trace + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + + # Service name is required for most backends + resource = Resource(attributes={ + ResourceAttributes.SERVICE_NAME: "haystack" + }) + + tracer_provider = TracerProvider(resource=resource) + processor = BatchSpanProcessor(OTLPSpanExporter(endpoint="http://localhost:4318/v1/traces")) + tracer_provider.add_span_processor(processor) + trace.set_tracer_provider(tracer_provider) + ``` +4. Tell Haystack to use OpenTelemetry for tracing: + + ```python + import haystack.tracing + + haystack.tracing.auto_enable_tracing() + ``` +5. Run your pipeline: + + ```python + ... + pipeline.run(...) + ... + ``` +6. Inspect the traces in the UI provided by Jaeger at [http://localhost:16686](http://localhost:16686/search). diff --git a/docs-website/docs/development/tracing/weave.mdx b/docs-website/docs/development/tracing/weave.mdx new file mode 100644 index 0000000000..49911730d4 --- /dev/null +++ b/docs-website/docs/development/tracing/weave.mdx @@ -0,0 +1,93 @@ +--- +title: "Weights & Biases Weave" +id: tracing-weave +slug: "/tracing-weave" +description: "Learn how to trace your Haystack pipelines with Weights & Biases Weave." +--- + +# Weights & Biases Weave + +Learn how to trace your Haystack pipelines with Weights & Biases Weave. + +
+ +| | | +| --- | --- | +| **Tracer class** | `WeaveTracer` | +| **How to enable** | Enable the tracer with `tracing.enable_tracing(WeaveTracer(project_name="..."))`, or add the `WeaveConnector` component to your pipeline | +| **Content tracing** | Required. Set `HAYSTACK_CONTENT_TRACING_ENABLED` to `true` | +| **Package** | `weave-haystack` | +| **API reference** | [Weave](/reference/integrations-weave) | +| **GitHub link** | https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/weave | + +
+ +## Overview + +Trace and visualize your pipeline execution in [Weights & Biases](https://wandb.ai/site/). Information captured by the Haystack tracing tool, such as API calls, context data, and prompts, is sent to Weights & Biases, where you can see the complete trace of your pipeline execution. + +## Installation + +Install the `weave-haystack` package: + +```shell +pip install weave-haystack +``` + +## Prerequisites + +1. A Weave account. You can sign up for free on the [Weights & Biases website](https://wandb.ai/site). +2. Set the `WANDB_API_KEY` environment variable with your Weights & Biases API key. Once logged in, you can find your API key on [your home page](https://wandb.ai/home). +3. Set the `HAYSTACK_CONTENT_TRACING_ENABLED` environment variable to `true`. + +## Usage + +Enable the `WeaveTracer` directly to trace any Haystack pipeline, without adding a component to it. The `project_name` is the name that will appear in your Weave project. + +```python +import os + +os.environ["HAYSTACK_CONTENT_TRACING_ENABLED"] = "true" + +from haystack import Pipeline, tracing +from haystack.components.builders import ChatPromptBuilder +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.dataclasses import ChatMessage + +from haystack_integrations.tracing.weave import WeaveTracer + +# Enable the Weave tracer +tracing.enable_tracing(WeaveTracer(project_name="test_pipeline")) + +pipe = Pipeline() +pipe.add_component("prompt_builder", ChatPromptBuilder()) +pipe.add_component("llm", OpenAIChatGenerator()) +pipe.connect("prompt_builder.prompt", "llm.messages") + +messages = [ + ChatMessage.from_system( + "Always respond in German even if some input data is in other languages.", + ), + ChatMessage.from_user("Tell me about {{location}}"), +] + +response = pipe.run( + data={ + "prompt_builder": { + "template_variables": {"location": "Berlin"}, + "template": messages, + }, + }, +) +print(response["llm"]["replies"][0]) +``` + +You can then see the complete trace for your pipeline at `https://wandb.ai//projects` under the project name you specified. + +## Alternative: the WeaveConnector component + +If you prefer to manage tracing as part of your pipeline definition, you can add the `WeaveConnector` component instead. It enables the same Weave tracing as soon as it runs. + +:::info +See the [`WeaveConnector` documentation page](../../pipeline-components/connectors/weaveconnector.mdx) for full usage examples. +::: diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 82c07dc521..a53e467578 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -751,7 +751,23 @@ export default { label: 'Development', items: [ 'development/logging', - 'development/tracing', + { + type: 'category', + label: 'Tracing', + link: { + type: 'doc', + id: 'development/tracing' + }, + items: [ + 'development/tracing/opentelemetry', + 'development/tracing/mlflow', + 'development/tracing/datadog', + 'development/tracing/langfuse', + 'development/tracing/weave', + 'development/tracing/logging-tracer', + 'development/tracing/custom-tracer', + ], + }, 'development/enabling-gpu-acceleration', 'development/hayhooks', { From 00ba1999d2b5de57e6615d355cf6bd20a4b50b64 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Tue, 16 Jun 2026 15:11:17 +0200 Subject: [PATCH 2/2] fix --- docs-website/docs/development/tracing/custom-tracer.mdx | 2 +- docs-website/docs/development/tracing/datadog.mdx | 2 +- docs-website/docs/development/tracing/langfuse.mdx | 2 +- docs-website/docs/development/tracing/logging-tracer.mdx | 2 +- docs-website/docs/development/tracing/mlflow.mdx | 2 +- docs-website/docs/development/tracing/opentelemetry.mdx | 2 +- docs-website/docs/development/tracing/weave.mdx | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs-website/docs/development/tracing/custom-tracer.mdx b/docs-website/docs/development/tracing/custom-tracer.mdx index ae40f71320..a28d0461d0 100644 --- a/docs-website/docs/development/tracing/custom-tracer.mdx +++ b/docs-website/docs/development/tracing/custom-tracer.mdx @@ -1,6 +1,6 @@ --- title: "Custom Tracer" -id: tracing-custom-tracer +id: custom-tracer slug: "/tracing-custom-tracer" description: "Learn how to connect Haystack to a custom tracing backend by implementing the Tracer interface." --- diff --git a/docs-website/docs/development/tracing/datadog.mdx b/docs-website/docs/development/tracing/datadog.mdx index 6c433d7f48..943f2d7ffd 100644 --- a/docs-website/docs/development/tracing/datadog.mdx +++ b/docs-website/docs/development/tracing/datadog.mdx @@ -1,6 +1,6 @@ --- title: "Datadog" -id: tracing-datadog +id: datadog slug: "/tracing-datadog" description: "Learn how to trace your Haystack pipelines with Datadog." --- diff --git a/docs-website/docs/development/tracing/langfuse.mdx b/docs-website/docs/development/tracing/langfuse.mdx index 81c20f5330..4604014410 100644 --- a/docs-website/docs/development/tracing/langfuse.mdx +++ b/docs-website/docs/development/tracing/langfuse.mdx @@ -1,6 +1,6 @@ --- title: "Langfuse" -id: tracing-langfuse +id: langfuse slug: "/tracing-langfuse" description: "Learn how to trace your Haystack pipelines with Langfuse." --- diff --git a/docs-website/docs/development/tracing/logging-tracer.mdx b/docs-website/docs/development/tracing/logging-tracer.mdx index fd4dedbaca..7529477377 100644 --- a/docs-website/docs/development/tracing/logging-tracer.mdx +++ b/docs-website/docs/development/tracing/logging-tracer.mdx @@ -1,6 +1,6 @@ --- title: "LoggingTracer" -id: tracing-logging-tracer +id: logging-tracer slug: "/tracing-logging-tracer" description: "Learn how to inspect the data flowing through your Haystack pipelines in real time with the LoggingTracer." --- diff --git a/docs-website/docs/development/tracing/mlflow.mdx b/docs-website/docs/development/tracing/mlflow.mdx index f06a79f8c1..0d4af73853 100644 --- a/docs-website/docs/development/tracing/mlflow.mdx +++ b/docs-website/docs/development/tracing/mlflow.mdx @@ -1,6 +1,6 @@ --- title: "MLflow" -id: tracing-mlflow +id: mlflow slug: "/tracing-mlflow" description: "Learn how to trace your Haystack pipelines with MLflow." --- diff --git a/docs-website/docs/development/tracing/opentelemetry.mdx b/docs-website/docs/development/tracing/opentelemetry.mdx index 0b0beaac8a..fe2d73e131 100644 --- a/docs-website/docs/development/tracing/opentelemetry.mdx +++ b/docs-website/docs/development/tracing/opentelemetry.mdx @@ -1,6 +1,6 @@ --- title: "OpenTelemetry" -id: tracing-opentelemetry +id: opentelemetry slug: "/tracing-opentelemetry" description: "Learn how to trace your Haystack pipelines with OpenTelemetry." --- diff --git a/docs-website/docs/development/tracing/weave.mdx b/docs-website/docs/development/tracing/weave.mdx index 49911730d4..f038648b83 100644 --- a/docs-website/docs/development/tracing/weave.mdx +++ b/docs-website/docs/development/tracing/weave.mdx @@ -1,6 +1,6 @@ --- title: "Weights & Biases Weave" -id: tracing-weave +id: weave slug: "/tracing-weave" description: "Learn how to trace your Haystack pipelines with Weights & Biases Weave." ---