Calculate synthetic NBBO from prop feeds

Overview

In this example, we will calculate a synthetic NBBO by taking the best bid and offer seen across the different exchanges. Unlike the official SIP NBBO which ignores odd lots and is calculated by CTA and UTP in their datacenters, this synthetic NBBO is derived from the direct prop feeds. Databento provides top-of-book coverage for all equity exchanges (except LTSE, which only provides <0.01% of total market volume). Databento captures all equity data in the NY4 datacenter with PTP timestamps. See our timestamping guide for more information.

Design

We'll use the MBP-1 schema and download data for the different equity exchanges. Next, we'll process the data sequentially based on ts_recv, which is the timestamp when Databento received the data from the exchange. We'll calculate the synthetic NBBO by taking the best bid and offer across these exchanges. After that, we'll plot this over a 100-millisecond window to take a closer look at how the synthetic NBBO reacts when price moves.

Example

Python

      
    
import os
from collections import defaultdict
from dataclasses import dataclass, field
from heapq import merge
import databento as db
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd

@dataclass(slots=True)
class PriceLevel:
    price: float
    size: int = 0
    count: int = 0
    update_time: int = 0

    def __str__(self) -> str:
        return f"{self.size:4} @ {self.price:6.2f} | {self.count:2} order(s)"

    @staticmethod
    def _bid_sort(r) -> tuple[float, int, int]:
        return r.price, r.size, -r.update_time

    @staticmethod
    def _ask_sort(r) -> tuple[float, int, int]:
        return -r.price, r.size, -r.update_time

@dataclass(slots=True)
class MbpBook:
    offer: PriceLevel | None = field(default=None)
    bid: PriceLevel | None = field(default=None)

    def bbo(self) -> tuple[PriceLevel | None, PriceLevel | None]:
        return self.bid, self.offer

    def apply(self, mbp1: db.MBP1Msg) -> None:
        level = mbp1.levels[0]
        ts_recv = mbp1.ts_recv

        if level.bid_px == db.UNDEF_PRICE:
            self.bid = None
        else:
            self.bid = PriceLevel(level.pretty_bid_px, level.bid_sz, level.bid_ct, ts_recv)

        if level.ask_px == db.UNDEF_PRICE:
            self.offer = None
        else:
            self.offer = PriceLevel(level.pretty_ask_px, level.ask_sz, level.ask_ct, ts_recv)

@dataclass(slots=True)
class MbpMarket:
    books: defaultdict[int, defaultdict[int, MbpBook]] = field(
        default_factory=lambda: defaultdict(lambda: defaultdict(MbpBook)),
    )

    def get_book(self, instrument_id: int, publisher_id: int) -> MbpBook:
        return self.books[instrument_id][publisher_id]

    def bbo(
        self,
        instrument_id: int,
        publisher_id: int,
    ) -> tuple[PriceLevel | None, PriceLevel | None]:
        return self.books[instrument_id][publisher_id].bbo()

    def aggregated_bbo(
        self,
        instrument_id: int,
    ) -> tuple[PriceLevel | None, PriceLevel | None]:
        """Calculate the aggregated BBO across all venues"""
        agg_bbo: list[PriceLevel | None] = [None]
        all_bbos = list(zip(*(book.bbo() for book in self.books[instrument_id].values())))
        for idx, reducer in ((0, max), (1, min)):
            all_best = [b for b in all_bbos[idx] if b]
            if all_best:
                best_price = reducer(b.price for b in all_best)
                best = [b for b in all_best if b.price == best_price]
                agg_bbo[idx] = PriceLevel(
                    price=best_price,
                    size=sum(b.size for b in best),
                    count=sum(b.count for b in best),
                )
        return tuple(agg_bbo)

    def consolidated_bbo(
        self,
        instrument_id: int,
    ) -> tuple[PriceLevel | None, PriceLevel | None]:
        all_bids, all_offers = zip(*(book.bbo() for book in self.books[instrument_id].values()))

        best_bid = max((b for b in all_bids if b), key=PriceLevel._bid_sort, default=None)
        best_offer = max((o for o in all_offers if o), key=PriceLevel._ask_sort, default=None)

        return best_bid, best_offer

    def apply(self, msg: db.MBP1Msg) -> None:
        book = self.books[msg.instrument_id][msg.publisher_id]
        book.apply(msg)

if __name__ == "__main__":
    equity_datasets = [
        "XNAS.ITCH",  # Nasdaq
        "XBOS.ITCH",  # Nasdaq BX
        "XPSX.ITCH",  # Nasdaq PSX
        "XNYS.PILLAR",  # NYSE
        "ARCX.PILLAR",  # NYSE Arca
        "XASE.PILLAR",  # NYSE American
        "XCHI.PILLAR",  # NYSE Texas
        "XCIS.TRADESBBO",  # NYSE National
        "MEMX.MEMOIR",  # Members Exchange
        "EPRL.DOM",  # MIAX Pearl
        "IEXG.TOPS",  # IEX
        "BATS.PITCH",  # Cboe BZX
        "BATY.PITCH",  # Cboe BYX
        "EDGA.PITCH",  # Cboe EDGA
        "EDGX.PITCH",  # Cboe EDGX
    ]

    symbol = "NVDA"
    start = pd.Timestamp(2025, 6, 17, 9, 30, tz="US/Eastern")
    end = pd.Timestamp(2025, 6, 17, 10, 0, tz="US/Eastern")
    schema = "mbp-1"

    client = db.Historical(key="$YOUR_API_KEY")

    # Get data for all datasets
    dataset_data_dict: dict[str, db.DBNStore] = {}
    for dataset in equity_datasets:
        dataset_name = dataset.replace(".", "-").lower()
        data_path = f"{dataset_name}-{symbol}-{start.date().isoformat().replace('-', '')}.{schema}.dbn.zst"
        if os.path.exists(data_path):
            data = db.DBNStore.from_file(data_path)
        else:
            data = client.timeseries.get_range(
                dataset=dataset,
                start=start,
                end=end,
                symbols=symbol,
                schema=schema,
                path=data_path,
            )
        dataset_data_dict[dataset] = data

    # Merge all datasets into one stream sorted by ts_recv
    data = merge(*dataset_data_dict.values(), key=lambda x: x.ts_recv)

    # Iterate over the records and calculate the consolidated BBO
    cbbo_list: list[tuple[pd.Timestamp, float, float]] = []
    market = MbpMarket()
    for record in data:
        market.apply(record)
        best_bid, best_offer = market.consolidated_bbo(record.instrument_id)

        cbbo_list.append((
            record.pretty_ts_recv,
            best_bid.price if best_bid is not None else float("Nan"),
            best_offer.price if best_offer is not None else float("Nan"),
        ))

    # Create DataFrame
    df = pd.DataFrame(cbbo_list, columns=["Timestamp", "Bid", "Offer"])
    df = df.set_index("Timestamp")
    df["is_crossed"] = df["Bid"] >= df["Offer"]

    # Now we'll plot a small slice of time when the book is crossed
    start_time = pd.Timestamp(2025, 6, 17, 9, 56, 31, 650000, tz="US/Eastern")
    end_time = pd.Timestamp(2025, 6, 17, 9, 56, 31, 750000, tz="US/Eastern")

    df = df.loc[start_time:end_time]

    fig, ax = plt.subplots(figsize=(11, 6))

    # Shade periods where book is not crossed green
    plt.fill_between(
        df.index,
        df["Bid"],
        df["Offer"],
        where=~df["is_crossed"],
        alpha=0.2,
        linewidth=0,
        color="green",
        step="post",
        label="Normal market",
    )

    # Shade periods where book is crossed red
    plt.fill_between(
        df.index,
        df["Offer"],
        df["Bid"],
        where=df["is_crossed"],
        alpha=0.2,
        linewidth=0,
        color="red",
        step="post",
        label="Crossed market",
    )

    # Plot BBO lines
    for col, color in [("Offer", "C1"), ("Bid", "C0")]:
        plt.hlines(
            y=df[col][:-1],
            xmin=df.index[:-1],
            xmax=df.index[1:],
            colors=color,
            label=col,
        )

    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%H:%M:%S.%f", tz="US/Eastern"))
    plt.ylabel("Price (USD)")
    plt.xlabel("Timestamp (ET)")
    plt.title(f"{symbol} synthetic NBBO")
    plt.legend()
    plt.tight_layout()
    plt.show()

Results

Notice that the synthetic NBBO may occasionally appear crossed, unlike the regulatory NBBO published by the SIPs. This is expected behavior due to two factors: first, proprietary feeds include odd lot quotations, which are excluded from SIP NBBO calculations; second, the feeds originate from different data centers, resulting in receive-time deltas. These characteristics can temporarily produce crossed markets, but they also enable the construction of a more predictive microprice.

Consolidated BBO