Find average spread for a symbol
There are 16 US equity exchanges that provide trading for RegNMS symbols. Knowing the liquidity characteristics for each venue is necessary in understanding the microstructure of the equity markets. Cboe publishes the US Equities Market Volume Summary report that indicates the relative market volume share for each venue. Databento provides coverage for all equity exchanges except LTSE, which provides <0.01% of total market volume.
Overview
In this example, we will use the historical client to get the average spread for SPY across the different equity venues before and after an increase in volatility. We'll use the BBO-1s schema which provides the bid and ask price subsampled in 1-second intervals to calculate the spread in basis-points (BPS). We'll compare these pre-volatility and post-volatility figures for the different venues.
Example
import datetime as dt
import databento as db
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
# Set parameters
symbol = "SPY"
prevol_date = dt.date(2025, 4, 1) # Tuesday before volatility spike
postvol_date = dt.date(2025, 4, 8) # Tuesday after volatility spike
# Regular Trading Hours (RTH) for US equities
rth_start = dt.time(9, 30)
rth_end = dt.time(16, 0)
equity_datasets = [
"XNAS.ITCH", # Nasdaq
"XBOS.ITCH", # Nasdaq BX
"XPSX.ITCH", # Nasdaq PSX
"XNYS.PILLAR", # NYSE
"ARCX.PILLAR", # NYSE Arca
"XASE.PILLAR", # NYSE American
"XCHI.PILLAR", # NYSE Texas
"XCIS.TRADESBBO", # NYSE National
"MEMX.MEMOIR", # Members Exchange
"EPRL.DOM", # MIAX Pearl
"IEXG.TOPS", # IEX
"BATS.PITCH", # Cboe BZX
"BATY.PITCH", # Cboe BYX
"EDGA.PITCH", # Cboe EDGA
"EDGX.PITCH", # Cboe EDGX
# Databento does not provide coverage for LTSE (MIC: LTSE)
]
client = db.Historical(key="YOUR_API_KEY")
venue_map = {x["dataset"]: x["venue"] for x in client.metadata.list_publishers()}
def calculate_spread(
dataset: str,
symbol: str,
date: dt.date,
) -> pd.DataFrame:
"""
Calculate spread size in basis points (bps)
"""
df = client.timeseries.get_range(
dataset=dataset,
symbols=symbol,
schema="bbo-1s", # BBO-1s is required for this example
start=date,
).to_df(tz="America/New_York")
df = df.resample("1s").ffill().loc[rth_start:rth_end]
# Calculate spread in basis points
df["midpoint"] = (df["ask_px_00"] + df["bid_px_00"]) / 2
df["spread_abs"] = (df["ask_px_00"] - df["bid_px_00"]).clip(lower=0)
df["spread_bps"] = df["spread_abs"] / df["midpoint"] * 1e4
df["Venue"] = venue_map[dataset]
df["Date"] = df.index.date
return df[["Venue", "Date", "spread_bps"]]
df_list: list[pd.DataFrame] = []
for date in (prevol_date, postvol_date):
for dataset in equity_datasets:
df_list.append(calculate_spread(dataset, symbol, date))
df = pd.concat(df_list, ignore_index=True)
plt.figure(figsize=(16, 8))
ax = sns.boxplot(
data=df,
x="Venue",
y="spread_bps",
hue="Date",
order=df[df["Date"] == postvol_date].groupby("Venue")["spread_bps"].median().sort_values().index,
gap=0.1,
width=0.8,
showfliers=False,
)
# Set colors for each venue
n_venues = len(equity_datasets)
venue_colors = sns.color_palette("Set1", n_colors=n_venues)
for i, patch in enumerate(ax.patches):
patch.set_facecolor(venue_colors[i % n_venues])
# Color the post-volatility boxplots a lighter shade
if i >= n_venues:
patch.set_facecolor((*patch.get_facecolor()[:3], 0.4))
# Adjust legend
prevol_patch, postvol_patch = ax.get_legend().get_patches()
prevol_patch.set_facecolor(venue_colors[0])
postvol_patch.set_facecolor((venue_colors[0], 0.4))
plt.legend(handles=[prevol_patch, postvol_patch], labels=[f"{prevol_date}", f"{postvol_date}"])
plt.xticks(rotation=45)
plt.ylabel("Spread size (bps)")
plt.title(f"{symbol} spreads before and after April 2025 tariff crash")
plt.tight_layout()
plt.show()
The venues with the highest market share, such as NYSE Arca (ARCX
) and Nasdaq (XNAS
), have spreads that remain relatively tight even after the increase in volatility.
Venues with a low market share see the largest increase in spreads post-volatility.