Last active
December 12, 2024 08:50
-
-
Save ezralanglois/2647a183fab2a8925de1c15e998e2987 to your computer and use it in GitHub Desktop.
Plot Q30 per cycle using the Illumina InterOp library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tested with | |
# - Anaconda Python 3.7 | |
# - Illumina InterOp 1.1.12 | |
import interop.py_interop_plot as interop_plot | |
import interop.py_interop_run as interop_run | |
import interop.py_interop_run_metrics as interop_run_metrics | |
import pandas as pd | |
run_folder = "" | |
metric = "Q30Percent" | |
# List of metrics can be found with | |
# metrics = interop_run.string_vector() | |
# interop_run.list_metric_type(metrics) | |
# [metrics[m] for m in range(metrics.size())] | |
# | |
# However only a subset can be plotted by cycle; this can determined with the following | |
# | |
# feature = interop_run.parse_metric_feature_type('CycleFeature') | |
# metrics = interop_run.string_vector() | |
# interop_run.list_metric_type(metrics) | |
# [metrics[m] for m in range(metrics.size()) if (interop_run_metrics.to_feature(interop_run.parse_metric_type(metrics[m])) & feature) == feature ] | |
# | |
# List can be found here: | |
# https://github.com/Illumina/interop/blob/master/interop/constants/enums.h#L41 | |
# | |
# Caveat 1: not all metrics are available on every platform. You will get NaN if the metric is not available. | |
# For example, Signal To Noise is only available on MiSeq | |
# | |
# Caveat 2: Certain plots are box plots/candle stick plots. Any plot that returns multiple curves is an average, so only | |
# x/y are defined; e.g. Intensity returns a plot per channel. The interface for these plots is the same. | |
# Load only the metrics necessary to plot % >= Q30 | |
metric = interop_run.parse_metric_type(metric) | |
valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 0) | |
interop_run_metrics.list_metrics_to_load_by_type(metric, valid_to_load) | |
run_metrics = interop_run_metrics.run_metrics() | |
run_metrics.read(run_folder, valid_to_load) | |
# Plot everything | |
options = interop_plot.filter_options(run_metrics.run_info().flowcell().naming_method()) | |
plot_data = interop_plot.candle_stick_plot_data() | |
interop_plot.plot_by_cycle(run_metrics, metric, options, plot_data) | |
if plot_data.size() == 1: | |
# Convert candlestick (kind of box plot) to a pandas DataFrame | |
# https://en.wikipedia.org/wiki/Box_plot | |
# P25 = 25th percentile | |
# P50 = median | |
# P75 = 75th percentile | |
# Lower Whisker = lowest data point excluding any outliers or p25 - 1.5 * (p75-p25) | |
# Upper Whisker = highest data point excluding any outliers or p75 + 1.5 * (p75-p25) | |
columns = ["x", "lower", "p25", "p50", "p75", "upper"] | |
data = [] | |
for j in range(plot_data.at(0).size()): | |
data_point = plot_data.at(0).at(j) | |
data.append([data_point.x() | |
, data_point.lower() | |
, data_point.p25() | |
, data_point.p50() | |
, data_point.p75() | |
, data_point.upper()]) | |
elif plot_data.size() > 1: | |
# Convert several x/y curves where the tiles are averaged to a pandas DataFrame | |
columns = ["x", "y", "label"] | |
data = [] | |
for i in range(plot_data.size()): | |
title = plot_data.at(i).title() | |
for j in range(plot_data.at(i).size()): | |
data_point = plot_data.at(i).at(j) | |
data.append([data_point.x(), data_point.y(), title]) | |
df = pd.DataFrame(data=data, columns=columns) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment