Calculate synthetic NBBO from prop feeds
Overview
In this example, we will calculate a synthetic NBBO by taking the best bid and offer seen across the different exchanges. Unlike the official SIP NBBO which ignores odd lots and is calculated by CTA and UTP in their datacenters, this synthetic NBBO is derived from the direct prop feeds. Databento provides top-of-book coverage for all equity exchanges (except LTSE, which only provides <0.01% of total market volume). Databento captures all equity data in the NY4 datacenter with PTP timestamps. See our timestamping guide for more information.
Design
We'll use the MBP-1 schema and download data for the different equity exchanges. Next, we'll process the data sequentially based on ts_recv, which is the timestamp when Databento received the data from the exchange. We'll calculate the synthetic NBBO by taking the best bid and offer across these exchanges. After that, we'll plot this over a 100-millisecond window to take a closer look at how the synthetic NBBO reacts when price moves.
Example
import os
from collections import defaultdict
from dataclasses import dataclass, field
from heapq import merge
from typing import Union
import databento as db
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
@dataclass()
class PriceLevel:
price: float
size: int = 0
count: int = 0
update_time: int = 0
def __str__(self) -> str:
return f"{self.size:4} @ {self.price:6.2f} | {self.count:2} order(s)"
@staticmethod
def _bid_sort(r) -> tuple[float, int, int]:
return r.price, r.size, -r.update_time
@staticmethod
def _ask_sort(r) -> tuple[float, int, int]:
return -r.price, r.size, -r.update_time
@dataclass()
class MbpBook:
offer: Union[PriceLevel, None] = field(default=None)
bid: Union[PriceLevel, None] = field(default=None)
def bbo(self) -> tuple[Union[PriceLevel, None], Union[PriceLevel, None]]:
return self.bid, self.offer
def apply(self, mbp1: db.MBP1Msg) -> None:
level = mbp1.levels[0]
ts_recv = mbp1.ts_recv
if level.bid_px == db.UNDEF_PRICE:
self.bid = None
else:
self.bid = PriceLevel(level.pretty_bid_px, level.bid_sz, level.bid_ct, ts_recv)
if level.ask_px == db.UNDEF_PRICE:
self.offer = None
else:
self.offer = PriceLevel(level.pretty_ask_px, level.ask_sz, level.ask_ct, ts_recv)
@dataclass()
class MbpMarket:
books: defaultdict[int, defaultdict[int, MbpBook]] = field(
default_factory=lambda: defaultdict(lambda: defaultdict(MbpBook)),
)
def get_book(self, instrument_id: int, publisher_id: int) -> MbpBook:
return self.books[instrument_id][publisher_id]
def bbo(
self,
instrument_id: int,
publisher_id: int,
) -> tuple[Union[PriceLevel, None], Union[PriceLevel, None]]:
return self.books[instrument_id][publisher_id].bbo()
def aggregated_bbo(
self,
instrument_id: int,
) -> tuple[Union[PriceLevel, None], Union[PriceLevel, None]]:
"""Calculate the aggregated BBO across all venues"""
agg_bbo: list[Union[PriceLevel, None]] = [None, None]
all_bbos = list(zip(*(book.bbo() for book in self.books[instrument_id].values())))
for idx, reducer in ((0, max), (1, min)):
all_best = [b for b in all_bbos[idx] if b]
if all_best:
best_price = reducer(b.price for b in all_best)
best = [b for b in all_best if b.price == best_price]
agg_bbo[idx] = PriceLevel(
price=best_price,
size=sum(b.size for b in best),
count=sum(b.count for b in best),
)
return tuple(agg_bbo)
def consolidated_bbo(
self,
instrument_id: int,
) -> tuple[Union[PriceLevel, None], Union[PriceLevel, None]]:
all_bids, all_offers = zip(*(book.bbo() for book in self.books[instrument_id].values()))
best_bid = max((b for b in all_bids if b), key=PriceLevel._bid_sort, default=None)
best_offer = max((o for o in all_offers if o), key=PriceLevel._ask_sort, default=None)
return best_bid, best_offer
def apply(self, msg: db.MBP1Msg) -> None:
book = self.books[msg.instrument_id][msg.publisher_id]
book.apply(msg)
if __name__ == "__main__":
equity_datasets = [
"XNAS.ITCH", # Nasdaq
"XBOS.ITCH", # Nasdaq BX
"XPSX.ITCH", # Nasdaq PSX
"XNYS.PILLAR", # NYSE
"ARCX.PILLAR", # NYSE Arca
"XASE.PILLAR", # NYSE American
"XCHI.PILLAR", # NYSE Texas
"XCIS.TRADESBBO", # NYSE National
"MEMX.MEMOIR", # Members Exchange
"EPRL.DOM", # MIAX Pearl
"IEXG.TOPS", # IEX
"BATS.PITCH", # Cboe BZX
"BATY.PITCH", # Cboe BYX
"EDGA.PITCH", # Cboe EDGA
"EDGX.PITCH", # Cboe EDGX
]
symbol = "NVDA"
start = pd.Timestamp(2025, 6, 17, 9, 30, tz="US/Eastern")
end = pd.Timestamp(2025, 6, 17, 10, 0, tz="US/Eastern")
schema = "mbp-1"
client = db.Historical(key="$YOUR_API_KEY")
# Get data for all datasets
dataset_data_dict: dict[str, db.DBNStore] = {}
for dataset in equity_datasets:
dataset_name = dataset.replace(".", "-").lower()
data_path = f"{dataset_name}-{symbol}-{start.date().isoformat().replace('-', '')}.{schema}.dbn.zst"
if os.path.exists(data_path):
data = db.DBNStore.from_file(data_path)
else:
data = client.timeseries.get_range(
dataset=dataset,
start=start,
end=end,
symbols=symbol,
schema=schema,
path=data_path,
)
dataset_data_dict[dataset] = data
# Merge all datasets into one stream sorted by ts_recv
data = merge(*dataset_data_dict.values(), key=lambda x: x.ts_recv)
# Iterate over the records and calculate the consolidated BBO
cbbo_list: list[tuple[pd.Timestamp, float, float]] = []
market = MbpMarket()
for record in data:
market.apply(record)
best_bid, best_offer = market.consolidated_bbo(record.instrument_id)
cbbo_list.append((
record.pretty_ts_recv,
best_bid.price if best_bid is not None else float("Nan"),
best_offer.price if best_offer is not None else float("Nan"),
))
# Create DataFrame
df = pd.DataFrame(cbbo_list, columns=["Timestamp", "Bid", "Offer"])
df = df.set_index("Timestamp")
df["is_crossed"] = df["Bid"] >= df["Offer"]
# Now we'll plot a small slice of time when the book is crossed
start_time = pd.Timestamp(2025, 6, 17, 9, 56, 31, 650000, tz="US/Eastern")
end_time = pd.Timestamp(2025, 6, 17, 9, 56, 31, 750000, tz="US/Eastern")
df = df.loc[start_time:end_time]
fig, ax = plt.subplots(figsize=(11, 6))
# Shade periods where book is not crossed green
plt.fill_between(
df.index,
df["Bid"],
df["Offer"],
where=~df["is_crossed"],
alpha=0.2,
linewidth=0,
color="green",
step="post",
label="Normal market",
)
# Shade periods where book is crossed red
plt.fill_between(
df.index,
df["Offer"],
df["Bid"],
where=df["is_crossed"],
alpha=0.2,
linewidth=0,
color="red",
step="post",
label="Crossed market",
)
# Plot BBO lines
for col, color in [("Offer", "C1"), ("Bid", "C0")]:
plt.hlines(
y=df[col][:-1],
xmin=df.index[:-1],
xmax=df.index[1:],
colors=color,
label=col,
)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%H:%M:%S.%f", tz="US/Eastern"))
plt.ylabel("Price (USD)")
plt.xlabel("Timestamp (ET)")
plt.title(f"{symbol} synthetic NBBO")
plt.legend()
plt.tight_layout()
plt.show()
Results
Notice that the synthetic NBBO may occasionally appear crossed, unlike the regulatory NBBO published by the SIPs. This is expected behavior due to two factors: first, proprietary feeds include odd lot quotations, which are excluded from SIP NBBO calculations; second, the feeds originate from different data centers, resulting in receive-time deltas. These characteristics can temporarily produce crossed markets, but they also enable the construction of a more predictive microprice.