Comparative Genomics
In [1]:
Copied!
# !pip install pycirclize pygenomeviz
# !apt install ncbi-blast+ mummer mmseqs2
# !pip install pycirclize pygenomeviz
# !apt install ncbi-blast+ mummer mmseqs2
Advanced users can plot figures for comparative genomics flexibly with pyCirclize API. In this notebook, simple code recipes for comparative genomics Circos visualization utilizing pyGenomeViz align module are shown.
MUMmer¶
Plot MUMmer alignment links between query-reference genomes.
In [2]:
Copied!
from pycirclize import Circos
from pygenomeviz.parser import Genbank
from pygenomeviz.utils import load_example_genbank_dataset
from pygenomeviz.align import MUMmer
TICKS_INTERVAL = 1000000
# Load query & reference genbank files
gbk_files = load_example_genbank_dataset("escherichia_coli")
ref_gbk = Genbank(gbk_files[2])
query_gbk = Genbank(gbk_files[3])
# Initialize circos instance
circos = Circos(
sectors=dict(**ref_gbk.get_seqid2size(), **dict(reversed(list(query_gbk.get_seqid2size().items())))),
start=-358,
end=2,
space=4,
sector2clockwise={seqid: False for seqid in query_gbk.get_seqid2size().keys()},
)
circos.text(f"{ref_gbk.name}\n({ref_gbk.full_genome_length:,} bp)", r=130, deg=35, size=13)
circos.text(f"{query_gbk.name}\n({query_gbk.full_genome_length:,} bp)", r=130, deg=-35, size=13)
# Plot genomic sector axis & xticks
for sector in circos.sectors:
track = sector.add_track((99.8, 100))
track.axis(fc="black")
if sector.size >= TICKS_INTERVAL:
track.xticks_by_interval(
TICKS_INTERVAL,
label_formatter=lambda v: f"{v/1000000:.1f} Mb",
label_orientation="vertical",
)
# MUMmer genome comparison & plot links
align_coords = MUMmer([query_gbk, ref_gbk]).run()
for ac in align_coords:
region1 = (ac.query_name, ac.query_start, ac.query_end)
region2 = (ac.ref_name, ac.ref_start, ac.ref_end)
color = "red" if ac.is_inverted else "grey"
circos.link(region1, region2, color=color, r1=98, r2=98)
fig = circos.plotfig()
from pycirclize import Circos
from pygenomeviz.parser import Genbank
from pygenomeviz.utils import load_example_genbank_dataset
from pygenomeviz.align import MUMmer
TICKS_INTERVAL = 1000000
# Load query & reference genbank files
gbk_files = load_example_genbank_dataset("escherichia_coli")
ref_gbk = Genbank(gbk_files[2])
query_gbk = Genbank(gbk_files[3])
# Initialize circos instance
circos = Circos(
sectors=dict(**ref_gbk.get_seqid2size(), **dict(reversed(list(query_gbk.get_seqid2size().items())))),
start=-358,
end=2,
space=4,
sector2clockwise={seqid: False for seqid in query_gbk.get_seqid2size().keys()},
)
circos.text(f"{ref_gbk.name}\n({ref_gbk.full_genome_length:,} bp)", r=130, deg=35, size=13)
circos.text(f"{query_gbk.name}\n({query_gbk.full_genome_length:,} bp)", r=130, deg=-35, size=13)
# Plot genomic sector axis & xticks
for sector in circos.sectors:
track = sector.add_track((99.8, 100))
track.axis(fc="black")
if sector.size >= TICKS_INTERVAL:
track.xticks_by_interval(
TICKS_INTERVAL,
label_formatter=lambda v: f"{v/1000000:.1f} Mb",
label_orientation="vertical",
)
# MUMmer genome comparison & plot links
align_coords = MUMmer([query_gbk, ref_gbk]).run()
for ac in align_coords:
region1 = (ac.query_name, ac.query_start, ac.query_end)
region2 = (ac.ref_name, ac.ref_start, ac.ref_end)
color = "red" if ac.is_inverted else "grey"
circos.link(region1, region2, color=color, r1=98, r2=98)
fig = circos.plotfig()