Plot Tips
In [1]:
Copied!
# !pip install pygenomeviz
# !pip install pygenomeviz
Subtrack¶
Users can add subtracks to a FeatureTrack and plot user-defined graphs to matplotlib Axes of the subtracks (Option for users familiar with matplotlib)
In [2]:
Copied!
from pygenomeviz import GenomeViz
import numpy as np
np.random.seed(0)
genome_list = [
dict(name="genome 01", size=1000, features=((150, 300, 1), (500, 700, -1), (750, 950, 1))),
dict(name="genome 02", size=1300, features=((50, 200, 1), (350, 450, 1), (700, 900, -1), (950, 1150, -1))),
dict(name="genome 03", size=1200, features=((150, 300, 1), (350, 450, -1), (500, 700, -1), (700, 900, -1))),
]
gv = GenomeViz(fig_track_height=0.7, feature_track_ratio=0.5, track_align_type="center")
gv.set_scale_bar()
for genome in genome_list:
name, size, features = genome["name"], genome["size"], genome["features"]
track = gv.add_feature_track(name, size)
track.add_subtrack(ylim=(0, 100)) # Add subtrack
for idx, feature in enumerate(features, 1):
start, end, strand = feature
track.add_feature(start, end, strand, plotstyle="bigarrow", lw=1)
fig = gv.plotfig()
# Plot user-defined graph to subtrack axes
for track in gv.feature_tracks:
subtrack = track.get_subtrack()
for segment in track.segments:
# Plot y = (50 - 100) random values to subtrack
# Tranform segment-level x coordinate to track-level coordinate
x = np.arange(segment.start, segment.end, 10)
x = segment.transform_coord(x)
y = np.random.randint(50, 100, len(x))
subtrack.ax.fill_between(x, y, color="grey")
# fig.savefig("result.png")
from pygenomeviz import GenomeViz
import numpy as np
np.random.seed(0)
genome_list = [
dict(name="genome 01", size=1000, features=((150, 300, 1), (500, 700, -1), (750, 950, 1))),
dict(name="genome 02", size=1300, features=((50, 200, 1), (350, 450, 1), (700, 900, -1), (950, 1150, -1))),
dict(name="genome 03", size=1200, features=((150, 300, 1), (350, 450, -1), (500, 700, -1), (700, 900, -1))),
]
gv = GenomeViz(fig_track_height=0.7, feature_track_ratio=0.5, track_align_type="center")
gv.set_scale_bar()
for genome in genome_list:
name, size, features = genome["name"], genome["size"], genome["features"]
track = gv.add_feature_track(name, size)
track.add_subtrack(ylim=(0, 100)) # Add subtrack
for idx, feature in enumerate(features, 1):
start, end, strand = feature
track.add_feature(start, end, strand, plotstyle="bigarrow", lw=1)
fig = gv.plotfig()
# Plot user-defined graph to subtrack axes
for track in gv.feature_tracks:
subtrack = track.get_subtrack()
for segment in track.segments:
# Plot y = (50 - 100) random values to subtrack
# Tranform segment-level x coordinate to track-level coordinate
x = np.arange(segment.start, segment.end, 10)
x = segment.transform_coord(x)
y = np.random.randint(50, 100, len(x))
subtrack.ax.fill_between(x, y, color="grey")
# fig.savefig("result.png")
In [3]:
Copied!
from pygenomeviz import GenomeViz
from pygenomeviz.parser import Genbank
from pygenomeviz.utils import load_example_genbank_dataset, is_pseudo_feature
gbk_files = load_example_genbank_dataset("mycoplasma_mycoides")
gbk_list = list(map(Genbank, gbk_files))
gv = GenomeViz(fig_track_height=0.5, feature_track_ratio=0.7)
gv.set_scale_bar()
# Plot CDS, rRNA features for each contig to tracks
for gbk in gbk_list:
track = gv.add_feature_track(gbk.name, gbk.get_seqid2size(), align_label=False)
# Add two subtracks
track.add_subtrack(name="GCcontent", ylim=(0, 100))
track.add_subtrack(name="GCskew", ylim=(-1, 1))
for seqid, features in gbk.get_seqid2features(None).items():
segment = track.get_segment(seqid)
for feature in features:
if feature.type == "CDS":
color = "grey" if is_pseudo_feature(feature) else "blue"
segment.add_features(feature, fc=color)
elif feature.type == "rRNA":
segment.add_features(feature, fc="lime")
fig = gv.plotfig()
# Plot GC content & GC skew graph to subtrack axes
for track, gbk in zip(gv.feature_tracks, gbk_list):
gc_content_subtrack = track.get_subtrack("GCcontent")
gc_skew_subtrack = track.get_subtrack("GCskew")
for segment in track.segments:
seq = gbk.get_seqid2seq()[segment.name]
# Plot GC content
x, gc_content = gbk.calc_gc_content(window_size=1000, step_size=500, seq=seq)
x = segment.transform_coord(x)
gc_content_subtrack.ax.fill_between(x, gc_content, color="grey")
# Plot GC skew
x, gc_skew = gbk.calc_gc_skew(window_size=1000, step_size=500, seq=seq)
x = segment.transform_coord(x)
gc_skew_subtrack.ax.fill_between(x, gc_skew, color="pink")
# fig.savefig("result.png")
from pygenomeviz import GenomeViz
from pygenomeviz.parser import Genbank
from pygenomeviz.utils import load_example_genbank_dataset, is_pseudo_feature
gbk_files = load_example_genbank_dataset("mycoplasma_mycoides")
gbk_list = list(map(Genbank, gbk_files))
gv = GenomeViz(fig_track_height=0.5, feature_track_ratio=0.7)
gv.set_scale_bar()
# Plot CDS, rRNA features for each contig to tracks
for gbk in gbk_list:
track = gv.add_feature_track(gbk.name, gbk.get_seqid2size(), align_label=False)
# Add two subtracks
track.add_subtrack(name="GCcontent", ylim=(0, 100))
track.add_subtrack(name="GCskew", ylim=(-1, 1))
for seqid, features in gbk.get_seqid2features(None).items():
segment = track.get_segment(seqid)
for feature in features:
if feature.type == "CDS":
color = "grey" if is_pseudo_feature(feature) else "blue"
segment.add_features(feature, fc=color)
elif feature.type == "rRNA":
segment.add_features(feature, fc="lime")
fig = gv.plotfig()
# Plot GC content & GC skew graph to subtrack axes
for track, gbk in zip(gv.feature_tracks, gbk_list):
gc_content_subtrack = track.get_subtrack("GCcontent")
gc_skew_subtrack = track.get_subtrack("GCskew")
for segment in track.segments:
seq = gbk.get_seqid2seq()[segment.name]
# Plot GC content
x, gc_content = gbk.calc_gc_content(window_size=1000, step_size=500, seq=seq)
x = segment.transform_coord(x)
gc_content_subtrack.ax.fill_between(x, gc_content, color="grey")
# Plot GC skew
x, gc_skew = gbk.calc_gc_skew(window_size=1000, step_size=500, seq=seq)
x = segment.transform_coord(x)
gc_skew_subtrack.ax.fill_between(x, gc_skew, color="pink")
# fig.savefig("result.png")
Annotation¶
Users can plot user-defined annotations to matplotlib Axes in FeatureTrack (Option for users familiar with matplotlib).
In [4]:
Copied!
from pygenomeviz import GenomeViz
from pygenomeviz.parser import Genbank
from pygenomeviz.utils import load_example_genbank_dataset
gbk_files = load_example_genbank_dataset("yersinia_phage")
gbk_list = list(map(Genbank, gbk_files))
gv = GenomeViz(fig_track_height=0.7, feature_track_ratio=0.5, track_align_type="center")
gv.set_scale_bar()
# Plot CDS features
for gbk in gbk_list:
track = gv.add_feature_track(gbk.name, gbk.get_seqid2size())
for seqid, features in gbk.get_seqid2features("CDS").items():
segment = track.get_segment(seqid)
segment.add_features(features, fc="limegreen", lw=0.5)
fig = gv.plotfig()
# Plot annotations on top track
top_track = gv.feature_tracks[0]
annotate_regions = ((0, 3000), (8000, 12000), (19000, 23000))
for idx, annotate_region in enumerate(annotate_regions, 1):
start, end = annotate_region
start, end = top_track.transform_coord(start), top_track.transform_coord(end)
# Plot horizontal line (track ylim=(-1.0, 1.0))
top_track.ax.hlines(y=1.3, xmin=start, xmax=end, color="red", lw=1, clip_on=False)
# Plot text
text_x, text_y = (start + end) / 2, 1.5
top_track.ax.text(text_x, text_y, s=f"region{idx}", size=15, va="bottom", ha="center")
# Plot fill box
x, y = (start, end, end, start), (-1, -1, 1, 1)
top_track.ax.fill(x, y, fc="skyblue", alpha=0.5, zorder=-1)
# fig.savefig("result.png")
from pygenomeviz import GenomeViz
from pygenomeviz.parser import Genbank
from pygenomeviz.utils import load_example_genbank_dataset
gbk_files = load_example_genbank_dataset("yersinia_phage")
gbk_list = list(map(Genbank, gbk_files))
gv = GenomeViz(fig_track_height=0.7, feature_track_ratio=0.5, track_align_type="center")
gv.set_scale_bar()
# Plot CDS features
for gbk in gbk_list:
track = gv.add_feature_track(gbk.name, gbk.get_seqid2size())
for seqid, features in gbk.get_seqid2features("CDS").items():
segment = track.get_segment(seqid)
segment.add_features(features, fc="limegreen", lw=0.5)
fig = gv.plotfig()
# Plot annotations on top track
top_track = gv.feature_tracks[0]
annotate_regions = ((0, 3000), (8000, 12000), (19000, 23000))
for idx, annotate_region in enumerate(annotate_regions, 1):
start, end = annotate_region
start, end = top_track.transform_coord(start), top_track.transform_coord(end)
# Plot horizontal line (track ylim=(-1.0, 1.0))
top_track.ax.hlines(y=1.3, xmin=start, xmax=end, color="red", lw=1, clip_on=False)
# Plot text
text_x, text_y = (start + end) / 2, 1.5
top_track.ax.text(text_x, text_y, s=f"region{idx}", size=15, va="bottom", ha="center")
# Plot fill box
x, y = (start, end, end, start), (-1, -1, 1, 1)
top_track.ax.fill(x, y, fc="skyblue", alpha=0.5, zorder=-1)
# fig.savefig("result.png")
Legend¶
Example of manual legend plotting code using Figure.legend()
method.
See Legend guide for more details.
In [5]:
Copied!
from pygenomeviz import GenomeViz
from pygenomeviz.parser import Gff
from pygenomeviz.utils import load_example_gff_file
from matplotlib.lines import Line2D
gff_file = load_example_gff_file("escherichia_coli.gff.gz")
gff = Gff(gff_file)
gv = GenomeViz(fig_track_height=0.5)
gv.set_scale_xticks(start=50000)
track = gv.add_feature_track(name="E.coli", segments=(50000, 80000))
track.add_sublabel()
segment = track.get_segment()
for feature in gff.extract_features("CDS", target_range=segment.range):
# Get gene name in GFF attributes column (e.g. `gene=araD;`)
gene_name = str(feature.qualifiers.get("gene", [""])[0])
# Set user-defined feature color based on gene name
if gene_name.startswith("ara"):
color = "blue"
elif gene_name.startswith("thi"):
color = "lime"
elif gene_name in ("pdxA", "surA", "lptD", "djlA", "yabP", "yabQ"):
color = "tomato"
else:
color = "grey"
segment.add_features(feature, plotstyle="bigarrow", color=color, label_type="gene")
fig = gv.plotfig()
# Plot legend for groups
_ = fig.legend(
handles=[
Line2D([], [], marker=">", color="tomato", label="Group1", ms=12, ls="none"),
Line2D([], [], marker=">", color="blue", label="Group2", ms=12, ls="none"),
Line2D([], [], marker=">", color="lime", label="Group3", ms=12, ls="none"),
Line2D([], [], marker=">", color="grey", label="Others", ms=12, ls="none"),
],
fontsize=12,
title="Groups",
title_fontsize=12,
loc="center left",
bbox_to_anchor=(1.02, 0.5),
handlelength=1.0,
)
# fig.savefig("result.png")
from pygenomeviz import GenomeViz
from pygenomeviz.parser import Gff
from pygenomeviz.utils import load_example_gff_file
from matplotlib.lines import Line2D
gff_file = load_example_gff_file("escherichia_coli.gff.gz")
gff = Gff(gff_file)
gv = GenomeViz(fig_track_height=0.5)
gv.set_scale_xticks(start=50000)
track = gv.add_feature_track(name="E.coli", segments=(50000, 80000))
track.add_sublabel()
segment = track.get_segment()
for feature in gff.extract_features("CDS", target_range=segment.range):
# Get gene name in GFF attributes column (e.g. `gene=araD;`)
gene_name = str(feature.qualifiers.get("gene", [""])[0])
# Set user-defined feature color based on gene name
if gene_name.startswith("ara"):
color = "blue"
elif gene_name.startswith("thi"):
color = "lime"
elif gene_name in ("pdxA", "surA", "lptD", "djlA", "yabP", "yabQ"):
color = "tomato"
else:
color = "grey"
segment.add_features(feature, plotstyle="bigarrow", color=color, label_type="gene")
fig = gv.plotfig()
# Plot legend for groups
_ = fig.legend(
handles=[
Line2D([], [], marker=">", color="tomato", label="Group1", ms=12, ls="none"),
Line2D([], [], marker=">", color="blue", label="Group2", ms=12, ls="none"),
Line2D([], [], marker=">", color="lime", label="Group3", ms=12, ls="none"),
Line2D([], [], marker=">", color="grey", label="Others", ms=12, ls="none"),
],
fontsize=12,
title="Groups",
title_fontsize=12,
loc="center left",
bbox_to_anchor=(1.02, 0.5),
handlelength=1.0,
)
# fig.savefig("result.png")
In [6]:
Copied!
from pygenomeviz import GenomeViz
from pygenomeviz.parser import Genbank
from pygenomeviz.utils import load_example_genbank_dataset, is_pseudo_feature
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
gbk_files = load_example_genbank_dataset("mycoplasma_mycoides")
gbk_list = list(map(Genbank, gbk_files))
gv = GenomeViz(fig_track_height=0.5, feature_track_ratio=0.7)
gv.set_scale_bar()
# Plot CDS, rRNA features for each contig to tracks
for gbk in gbk_list:
track = gv.add_feature_track(gbk.name, gbk.get_seqid2size(), align_label=False)
# Add two subtracks
track.add_subtrack(name="GCcontent", ylim=(0, 100))
track.add_subtrack(name="GCskew", ylim=(-1, 1))
for seqid, features in gbk.get_seqid2features(None).items():
segment = track.get_segment(seqid)
for feature in features:
if feature.type == "CDS":
color = "grey" if is_pseudo_feature(feature) else "blue"
segment.add_features(feature, fc=color)
elif feature.type == "rRNA":
segment.add_features(feature, fc="lime")
fig = gv.plotfig()
# Plot GC content & GC skew graph to subtrack axes
for track, gbk in zip(gv.feature_tracks, gbk_list):
gc_content_subtrack = track.get_subtrack("GCcontent")
gc_skew_subtrack = track.get_subtrack("GCskew")
for segment in track.segments:
seq = gbk.get_seqid2seq()[segment.name]
# Plot GCcontent
x, gc_content = gbk.calc_gc_content(window_size=1000, step_size=500, seq=seq)
x = segment.transform_coord(x)
gc_content_subtrack.ax.fill_between(x, gc_content, color="grey")
# Plot GCskew
x, gc_skew = gbk.calc_gc_skew(window_size=1000, step_size=500, seq=seq)
x = segment.transform_coord(x)
gc_skew_subtrack.ax.fill_between(x, gc_skew, color="pink")
# Plot legend for feature types
_ = fig.legend(
handles=[
Line2D([], [], marker=">", color="blue", label="CDS", ms=12, ls="none"),
Line2D([], [], marker=">", color="grey", label="Pseudogene", ms=12, ls="none"),
Line2D([], [], marker=">", color="lime", label="rRNA", ms=12, ls="none"),
],
fontsize=12,
title="Feature Types",
title_fontsize=12,
bbox_to_anchor=(0.9, 1.0),
loc="upper left",
handlelength=1.0,
)
# Plot legend for subtrack graphs
_ = fig.legend(
handles=[
Patch(color="grey", label="GC content"),
Patch(color="pink", label="GC skew"),
],
fontsize=12,
title="Graphs",
title_fontsize=12,
bbox_to_anchor=(0.9, 0.8),
loc="upper left",
handlelength=1.0,
)
# fig.savefig("result.png")
from pygenomeviz import GenomeViz
from pygenomeviz.parser import Genbank
from pygenomeviz.utils import load_example_genbank_dataset, is_pseudo_feature
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
gbk_files = load_example_genbank_dataset("mycoplasma_mycoides")
gbk_list = list(map(Genbank, gbk_files))
gv = GenomeViz(fig_track_height=0.5, feature_track_ratio=0.7)
gv.set_scale_bar()
# Plot CDS, rRNA features for each contig to tracks
for gbk in gbk_list:
track = gv.add_feature_track(gbk.name, gbk.get_seqid2size(), align_label=False)
# Add two subtracks
track.add_subtrack(name="GCcontent", ylim=(0, 100))
track.add_subtrack(name="GCskew", ylim=(-1, 1))
for seqid, features in gbk.get_seqid2features(None).items():
segment = track.get_segment(seqid)
for feature in features:
if feature.type == "CDS":
color = "grey" if is_pseudo_feature(feature) else "blue"
segment.add_features(feature, fc=color)
elif feature.type == "rRNA":
segment.add_features(feature, fc="lime")
fig = gv.plotfig()
# Plot GC content & GC skew graph to subtrack axes
for track, gbk in zip(gv.feature_tracks, gbk_list):
gc_content_subtrack = track.get_subtrack("GCcontent")
gc_skew_subtrack = track.get_subtrack("GCskew")
for segment in track.segments:
seq = gbk.get_seqid2seq()[segment.name]
# Plot GCcontent
x, gc_content = gbk.calc_gc_content(window_size=1000, step_size=500, seq=seq)
x = segment.transform_coord(x)
gc_content_subtrack.ax.fill_between(x, gc_content, color="grey")
# Plot GCskew
x, gc_skew = gbk.calc_gc_skew(window_size=1000, step_size=500, seq=seq)
x = segment.transform_coord(x)
gc_skew_subtrack.ax.fill_between(x, gc_skew, color="pink")
# Plot legend for feature types
_ = fig.legend(
handles=[
Line2D([], [], marker=">", color="blue", label="CDS", ms=12, ls="none"),
Line2D([], [], marker=">", color="grey", label="Pseudogene", ms=12, ls="none"),
Line2D([], [], marker=">", color="lime", label="rRNA", ms=12, ls="none"),
],
fontsize=12,
title="Feature Types",
title_fontsize=12,
bbox_to_anchor=(0.9, 1.0),
loc="upper left",
handlelength=1.0,
)
# Plot legend for subtrack graphs
_ = fig.legend(
handles=[
Patch(color="grey", label="GC content"),
Patch(color="pink", label="GC skew"),
],
fontsize=12,
title="Graphs",
title_fontsize=12,
bbox_to_anchor=(0.9, 0.8),
loc="upper left",
handlelength=1.0,
)
# fig.savefig("result.png")