Execution slippage and markouts
What is a markout?
A markout measures the price change within some arbitrary time interval before or after an event, such as a fill or a lit trading signal. Markouts are often used for transaction cost analysis (TCA) and determining execution quality, but they may also be used for alpha research.
For more information on markouts, check out the Databento Microstructure Guide.
Overview
In this example we'll use the Historical client to create markout curves.
This example will use the MBP-1 schema. This schema provides every trade and order book event that updates the top of book.
In this case we'll compute the markouts by midprice change, but it's also possible to compute it on trade-to-mid or trade-to-trade basis. Markouts are usually computed at different time intervals and then averaged over multiple events to a create a single curve.
Markout events
We'll calculate markout curves for the aggressive and passive side of every trade in NVDA for the following quantities:
- less than 100 shares
- 100 or more shares
- 200 or more shares
We'll use these curves to help understand adverse selection on passive limit orders and execution slippage on aggressive market orders to help determine order execution strategy.
Example
from pathlib import Path
import databento as db
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
NUM_TIME_SAMPLES = 1000
WINDOW_LIMITS_MICROS = 120 * 1e6
def calc_markouts(
midprice: pd.Series,
events: pd.DataFrame,
offsets: pd.TimedeltaIndex,
aggressor_markout: bool = True,
) -> pd.DataFrame:
# Create an empty array to store results
results = np.empty(shape=(len(events), len(offsets), 1))
# Calculate markouts for each event
for i, (ts_event, event) in enumerate(events.iterrows()):
markouts = midprice.reindex(offsets + ts_event, method="ffill") - event["price"]
# All "event" records here are aggressive trades, so `side` is the aggressor side
# Passive seller
if not aggressor_markout and event["side"] == "B":
markouts *= -1
# Aggressive seller
elif aggressor_markout and event["side"] == "A":
markouts *= -1
markouts_in_mils = markouts * 1e4
results[i] = markouts_in_mils.values.reshape(-1, 1)
# Transpose the array and calculate the mean
return pd.DataFrame(
data=np.nanmean(results, 0),
index=offsets.total_seconds() * 1e6,
)
def plot_markouts(
events: pd.DataFrame,
midprice: pd.Series,
lt_thresholds: tuple[int] = (100,),
gt_thresholds: tuple[int] = (100, 200),
aggressor_markout: bool = True,
) -> None:
# Create log spaced intervals from -2 minutes -> +2 minutes
samples = np.geomspace(1e-3, WINDOW_LIMITS_MICROS, num=NUM_TIME_SAMPLES)
offsets = pd.to_timedelta(
np.append(-np.flip(samples), np.append(0, samples)),
unit="microseconds",
)
for threshold in lt_thresholds:
# Filter trade events below the size threshold
events_mask = events[events["size"] < threshold]
# Calculate markouts
markouts = calc_markouts(midprice, events_mask, offsets, aggressor_markout)
plt.plot(
markouts,
drawstyle="steps-post",
label=f"order size < {threshold:d} (n={len(events_mask):,d})",
)
for threshold in gt_thresholds:
# Filter trade events above the size threshold
events_mask = events[events["size"] >= threshold]
# Calculate markouts
markouts = calc_markouts(midprice, events_mask, offsets, aggressor_markout)
plt.plot(
markouts,
drawstyle="steps-post",
label=f"order size >= {threshold:d} (n={len(events_mask):,d})",
)
# Annotations
plt.title(f"Markouts - {'Aggressive' if aggressor_markout else 'Passive'} orders")
plt.xlabel("Time since event (μs)")
plt.ylabel("PnL/share (mils)")
plt.xscale("symlog")
plt.legend()
plt.show()
if __name__ == "__main__":
# Set parameters
dataset = "XNAS.ITCH"
symbol = "NVDA"
start = "2024-05-21"
# Create historical client
client = db.Historical("$YOUR_API_KEY")
# Download data or read an existing file
mbp_data_file = Path(f"xnas-itch-{start.replace('-', '')}-mbp-1.dbn.zst")
if not mbp_data_file.exists():
mbp_data = client.timeseries.get_range(
dataset=dataset,
symbols=symbol,
schema="mbp-1",
start=start,
path=mbp_data_file,
)
else:
mbp_data = db.DBNStore.from_file(mbp_data_file)
# Convert to DataFrame
df = mbp_data.to_df()
df = df.dropna().set_index("ts_event")
# We will use the midprice to calculate markouts
midprice_s = df[["bid_px_00", "ask_px_00"]].mean(axis=1).groupby("ts_event").last()
# Filter for trades (exclude non-cross and cross trades)
trades = df[(df["action"] == "T") & (df["side"] != "N")]
# Plot markouts for limit orders
plot_markouts(trades, midprice_s, aggressor_markout=False)
trades["cost"] = trades["price"] * trades["size"]
agg_trades = trades.groupby("ts_event").agg({
"size": "sum",
"side": "first",
"cost": "sum",
})
agg_trades["price"] = agg_trades["cost"] / agg_trades["size"]
# Plot markouts for market orders
plot_markouts(agg_trades, midprice_s, aggressor_markout=True)
Result
Observe that crossing the spread with smaller orders exhibits less market impact, which agrees with our general intuition.
We see the markouts are initially positive after the fill, as passive orders collect the spread and don't incur instantaneous slippage.
However, the markouts are negative after roughly 1 second, showing the strong effects of adverse selection on passive orders of all sizes.