Source code for pycmplot.plotting.circular

"""
pycmplot.plotting.circular
===========================

Circos-style multi-track circular Manhattan plot.

The module exposes two public functions and one internal per-sector helper:

* :func:`plot_circular` — user-facing entry point.  Configures the
  :class:`pycirclize.Circos` canvas, computes track radii, iterates over
  sectors and tracks, renders gene/SNP annotations, and saves the figure.
* :func:`compute_track_radii_dict` — divides the radial space between
  *r_min* and *r_max* into *n_tracks* evenly-spaced, padded bands and
  returns their ``(r_start, r_end)`` limits.
* :func:`plot_circosm` — internal per-sector renderer called once per
  ``(sector, sumstat)`` pair inside the main loop of :func:`plot_circular`.
  Mutates the :class:`pycirclize.Sector` object in place and returns
  ``None``.
"""

from __future__ import annotations

import logging
import math
from typing import Optional

import numpy as np
import pandas as pd

from pycmplot.io import get_output_paths
from pycmplot.stats import get_highlight_snps
from pycmplot.annotation import get_annotation_column

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Track radius calculator
# ---------------------------------------------------------------------------

[docs] def compute_track_radii_dict( n_tracks: int, r_min: float = 20, r_max: float = 100, pad: float = 1, annotate: bool = False, ) -> dict[str, tuple[float, float]]: """Compute ``(r_start, r_end)`` tuples for *n_tracks* evenly-spaced radial bands. Divides the usable radial space between *r_min* and *r_max* into *n_tracks* bands of equal height, separated by gaps of *pad* units. The tracks are ordered from innermost (``'track_1'``) to outermost (``'track_n'``). Parameters ---------- n_tracks : int Number of data tracks to accommodate. r_min : float, optional Inner boundary of the full plotting area (as a percentage of the figure radius). Default ``20``. r_max : float, optional Outer boundary of the full plotting area. Default ``100``. pad : float, optional Gap in the same radius units between consecutive tracks. Default ``1``. annotate : bool, optional If ``True``, an extra slot is reserved for the annotation ring by incrementing *n_tracks* before computing heights. The extra slot is always placed at the outermost position. Default ``False``. Returns ------- dict Mapping of ``'track_i' → (r_start, r_end)`` for ``i`` in ``1 … n_tracks`` (plus one extra entry when *annotate* is ``True``). Raises ------ ValueError If the total padding ``pad × (n_tracks − 1)`` exceeds the available radial space ``r_max − r_min``. Examples -------- >>> from pycmplot.plotting.circular import compute_track_radii_dict >>> radii = compute_track_radii_dict(n_tracks=3, r_min=20, r_max=100, pad=2) >>> list(radii.items()) [('track_1', (20.0, 45.33...)), ('track_2', (47.33..., 72.66...)), ('track_3', (74.66..., 100.0))] """ if annotate: n_tracks += 1 total_space = r_max - r_min usable_space = total_space - pad * (n_tracks - 1) if usable_space <= 0: raise ValueError( f"Padding ({pad}) is too large for {n_tracks} tracks in " f"radius range [{r_min}, {r_max}]." ) track_height = usable_space / n_tracks radii: dict[str, tuple[float, float]] = {} current = float(r_min) for i in range(n_tracks): radii[f"track_{i + 1}"] = (current, current + track_height) current += track_height + pad return radii
# --------------------------------------------------------------------------- # Per-chromosome circular Manhattan track # ---------------------------------------------------------------------------
[docs] def plot_circosm( sector=None, sector_radius=None, assoc: Optional[pd.DataFrame] = None, assoc_by_chr: pd.DataFrame = None, sector_sizes: Optional[dict] = None, chrom_label_loc: Optional[float] = -3, chrom_label_size: float = 6, track_label_size: float = 6, track_label_orientation: Optional[str] = "vertical", track_index: int = 0, assoc_label: Optional[str] = None, logp: bool = True, signif_line: Optional[float] = 5e-8, signif_threshold: Optional[float] = 5e-8, suggest_line: Optional[float] = 1e-5, suggest_threshold: Optional[float] = 1e-5, highlight: bool = False, highlight_color: str = 'brown', colors: Optional[list[str]] = ['steelblue','orange'], point_size: float = 6, no_track_labels: bool = False ) -> None: """Plot one track of summary statistics onto a single pycirclize sector. This is a low-level internal function called once for every ``(sector, sumstat)`` combination in the :func:`plot_circular` main loop. It adds a scatter track to *sector* in-place and optionally draws significance lines, y-axis ticks (on the first chromosome only), and chromosome labels. Returns ``None``. Parameters ---------- sector : pycirclize.Sector The pycirclize Sector object representing one chromosome arc. sector_radius : tuple of (float, float) ``(r_start, r_end)`` radial limits for this track within *sector*, as returned by :func:`compute_track_radii_dict`. assoc : pandas.DataFrame, optional Full summary statistics DataFrame (all chromosomes). Filtered to the current sector's chromosome internally. Must have columns ``CHR``, ``POS``, ``P``, and ``logP`` (when *logp* is ``True``). sector_sizes : dict, optional Ordered mapping of ``chrom → [min_pos, max_pos]`` as returned by :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`. Used to identify the first and last sectors for y-axis ticks and track labels. chrom_label_loc : float or None Radial position at which to draw the chromosome label. Computed in :func:`plot_circular` from *chrom_label_side*, *r_min*, and *r_max*. chrom_label_size : float, optional Font size for chromosome labels. Default ``6``. track_label_size : float, optional Font size for the track (sumstat) label written on the spacer sector. Default ``6``. track_label_orientation : {'vertical', 'horizontal'}, optional Orientation of the track label text. Default ``'vertical'``. track_index : int, optional 0-based index of the current sumstat track. Chromosome labels are only drawn on ``track_index == 0`` (or for chromosome X). Default ``0``. assoc_label : str, optional Track label text (sumstat name) rendered on the spacer sector. logp : bool, optional If ``True``, use the ``logP`` column for y-values and threshold comparisons. Default ``True``. signif_line : float, optional Y-value at which to draw the genome-wide significance dashed line (orange-red). Default ``5e-8``. signif_threshold : float, optional Significance threshold used for y-axis scaling. Default ``5e-8``. suggest_line : float or bool, optional Y-value for the suggestive significance dashed line (light blue). Pass ``False`` or ``None`` to suppress. Default ``1e-5``. suggest_threshold : float, optional Suggestive threshold value used for y-axis scaling. Default ``1e-5``. highlight : bool, optional If ``True``, variants within significant loci (``in_locus == True`` after :func:`~pycmplot.stats.get_highlight_snps`) are rendered in ``highlight_color`` (see below). Default ``False``. highlight_color : str, optional Color of highlighted positions when *highlight* is ``True``. Default ``brown``. colors : list of str, optional Two alternating colours for even/odd chromosome numbers. Default ``['steelblue', 'orange']``. no_track_labels : bool, optional Suppress the track label on the spacer sector. Default ``False``. """ genome_wide_sig = signif_threshold suggestive = suggest_threshold #assoc_uniq_chroms = list(assoc["CHR"].unique()) assoc_uniq_chroms = set(assoc["CHR"]) v_min = float(math.floor(min(assoc["logP"]))) if logp else float(math.floor(min(assoc["P"]))) v_max = float(math.ceil(max(assoc["logP"]))) if logp else float(math.ceil(max(assoc["P"]))) if logp: v_max += 2 if pd.isna(v_max): v_max = 0.0 sector_keys = list(sector_sizes.keys()) # ------------------------------------------------------------------ # Track label on the last (spacer) sector # ------------------------------------------------------------------ if sector.name == sector_keys[-1]: lbl_track = sector.add_track(sector_radius) lbl_track.axis(fc="white", alpha=0) if no_track_labels: pass else: lbl_track.text( assoc_label, x=(sector.end - sector.start) / 6, adjust_rotation=True, orientation=track_label_orientation, size=float(track_label_size), color="black", fontstyle="normal", fontweight="regular", multialignment="left", ) if sector.name not in assoc_uniq_chroms: return # ------------------------------------------------------------------ # Chromosome label (first track only, or chrX) # ------------------------------------------------------------------ if track_index == 0 or sector.name == "X": if chrom_label_loc > 100: chr_label = str("chr") + str(sector.name.replace("23", "X")) else: chr_label = sector.name.replace("23", "X") sector.text( chr_label, r=chrom_label_loc, size=chrom_label_size, ) sector.axis(fc="none", lw=0, ec="none", alpha=0.5) # ------------------------------------------------------------------ # Y-axis ticks on the first chromosome # ------------------------------------------------------------------ if sector.name == sector_keys[0]: yax_track = sector.add_track(sector_radius) yax_track.axis(fc="white", alpha=0.08) if logp: tick_step = 1 yticks = [] while len(yticks) < 2 or len(yticks) > 5: yticks = np.arange(v_min, v_max, tick_step) tick_step += 1 else: yticks = np.arange(v_min, v_max) yax_track.yticks( yticks, labels=[str(int(t)) for t in yticks], side="left", vmin=v_min, vmax=v_max, label_size=5, ) # ------------------------------------------------------------------ # Data track # ------------------------------------------------------------------ #assoc_chr = assoc.loc[assoc["CHR"] == sector.name] assoc_chr = assoc_by_chr.get(sector.name) if assoc_chr is None: return track = sector.add_track(sector_radius, r_pad_ratio=0.05) track.axis(fc="lightgrey", alpha=0.08) chrom_num = sector.name.replace("X", "23").replace("Y", "24") color = colors[0] if int(chrom_num) % 2 == 0 else colors[1] y_col = "logP" if logp else "P" if highlight: sig = assoc_chr[assoc_chr["in_locus"]] bg = assoc_chr[~assoc_chr["in_locus"]] track.scatter( data=bg, x=list(bg["POS"]), #.astype(float)), y=list(bg[y_col]), #.astype(float)), vmin=v_min, vmax=v_max, marker="o", s=point_size, color=color, alpha=1, ) if not sig.empty: track.scatter( list(sig["POS"]), #.to_numpy(), list(sig[y_col]), #.to_numpy(), vmin=v_min, vmax=v_max, s=point_size, marker="o", color=highlight_color, ) else: track.scatter( data=assoc_chr, x=list(assoc_chr["POS"]), #.astype(float)), y=list(assoc_chr[y_col]), #.astype(float)), vmin=v_min, vmax=v_max, marker="o", s=point_size, color=color, alpha=1, ) # ------------------------------------------------------------------ # Significance lines # ------------------------------------------------------------------ if signif_line: track.line( x=[sector.start, sector.end], y=[genome_wide_sig, genome_wide_sig], vmin=v_min, vmax=v_max, color="orangered", linestyle="--", ) if suggest_line: track.line( x=[sector.start, sector.end], y=[suggestive, suggestive], vmin=v_min, vmax=v_max, color="navy", linestyle="--", )
[docs] def plot_circular( sumstats_loaded: dict, sector_sizes: dict = None, signif_lines: dict = None, logp: bool = False, pad: float = 1, r_min: float = 20, r_max: float = 100, annotate: str = None, label_col: str = None, chrom_label_side: str = 'inside', chrom_label_size: float = 6, signif_line: float = 5e-8, highlight: bool = False, highlight_thresh: float = 5e-8, highlight_color: str = 'brown', highlight_line: bool = False, highlight_line_color: str = 'grey', colors: list[str] = ['steelblue','silver'], point_size: float = 6, track_label_size: float = 6, track_label_orientation: str = 'vertical', hits_table: pd.DataFrame = None, annotation_size: float = 6, plot_title: Optional[str] = None, plot_title_size: float = 12, dpi: Optional[int] = None, output_format: Optional[str] = 'png', output_dir: Optional[str] = '.', ylabel: Optional[str] = None, no_track_labels: bool = False ): """Generate a multi-track Circos-style circular Manhattan plot. Sets up a :class:`pycirclize.Circos` canvas with one arc sector per chromosome, computes radial track extents, and calls :func:`plot_circosm` once per ``(sector, sumstat)`` pair to populate each track with scatter data and significance lines. After all tracks are rendered, gene or SNP annotations from *hits_table* are added to a dedicated annotation ring, and a shared y-axis label is placed on the spacer sector. Parameters ---------- sumstats_loaded : dict Mapping of ``label → [DataFrame, n_chroms]`` as returned by :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`. One radial track is created per key. The outermost track corresponds to the first key after reversal of the radii dict. sector_sizes : dict, optional Ordered mapping of ``chrom → [min_pos, max_pos]`` defining the arc length of each chromosome sector. The last key is expected to be ``'Spacer1'`` (automatically added by :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`). signif_lines : list of dict, optional One ``{'genome': float, 'suggestive': float}`` dict per track in the same order as *sumstats_loaded*, as returned by :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`. logp : bool, optional Plot –log₁₀(p) radially. Default ``False``. pad : float, optional Gap in radius units between consecutive tracks. Default ``1``. r_min : float, optional Inner radius of the innermost track (as a percentage of the figure radius). Default ``0``. r_max : float, optional Outer radius of the outermost track. Default ``100``. annotate : {'SNP', 'GENE'} or falsy, optional Annotation content for significant loci. ``'GENE'`` uses ``nearest_upstream_gene`` for genic hits and ``top_gene`` for intergenic hits (italic text); ``'SNP'`` uses the ``SNP`` column (regular text). Pass ``None`` or ``False`` to disable annotations. Default ``'SNP'``. chrom_label_side : {'inside', 'outside'}, optional Radial position of chromosome labels. ``'inside'`` places them just inside the innermost track; ``'outside'`` places them beyond the outermost track. Default ``'inside'``. signif_line : float, optional Genome-wide significance threshold value for the orange-red dashed line. Default ``5e-8``. highlight : bool, optional Render significant-locus variants in brown. Default ``False``. highlight_thresh : float, optional P-value threshold for locus highlighting. Default ``5e-8``. highlight_color : str, optional Color of highlighted positions when *highlight* is ``True``. Default ``brown``. colors : list of str, optional Two alternating chromosome colours. Default ``['steelblue', 'grey']``. chrom_label_size : float, optional Chromosome label font size. Default ``6``. track_label_size : float, optional Track (sumstat) label font size. Default ``6``. track_label_orientation : {'vertical', 'horizontal'}, optional Track label text orientation. Default ``'vertical'``. hits_table : pandas.DataFrame, optional Hits summary table from :func:`~pycmplot.annotation.get_hits_summary_table`. Required for annotations (``annotate`` truthy and ``hits_table`` non-empty). annotation_size : float, optional Font size for annotation labels. Default ``6``. highlight_line : bool, optional Draw a dashed radial line from the innermost track to the annotation ring for each annotated position. Default ``False``. highlight_line_color : str, optional Color of highlight line when *highlight_line* is ``True``. plot_title : str, optional Text placed in the centre of the circle and used as the output file-name stem. plot_title_size : float, optional Font size for the centre title. Default ``12``. dpi : int, optional Output resolution in dots per inch. Default ``300``. output_format : str, optional Image format (``'png'``, ``'pdf'``, ``'svg'``, ``'jpg'``). Default ``'png'``. output_dir : str or pathlib.Path, optional Output directory. Default ``'.'``. ylabel : str, optional Override the shared y-axis label (left margin). Useful for non-p-value statistics such as iHS, F_ST or XP-EHH (e.g. ``ylabel="iHS"``). When ``None`` (the default), the label is ``"-log₁₀(p-value)"`` if *logp* is ``True`` and ``"P"`` otherwise. no_track_labels : bool, optional Suppress track labels on the spacer sector. Default ``False``. Returns ------- matplotlib.figure.Figure The completed circular Manhattan figure (also saved to *output_dir*). See Also -------- pycmplot.plotting.linear.plot_linear : Linear (stacked) counterpart to this function. compute_track_radii_dict : Computes the ``(r_start, r_end)`` limits for each track. pycmplot.io.get_sumstats_and_merged_sector_list : Produces *sumstats_loaded*, *sector_sizes*, and *signif_lines*. Examples -------- >>> from pycmplot.plotting.circular import plot_circular >>> fig = plot_circular( ... sumstats_loaded=loaded, ... sector_sizes=sectors, ... signif_lines=sig_lines, ... logp=True, ... highlight=True, ... annotate="GENE", ... hits_table=hits, ... plot_title="RBC_Traits", ... output_dir="./results", ... ) """ from pycirclize import Circos # plot name labels = list(sumstats_loaded.keys()) ( plt_name, table_out, plt_base, ) = get_output_paths( labels, mode='cm', logp=logp, output_dir=output_dir, plot_title=plot_title, output_format=output_format ) circos = Circos(sector_sizes, space=0.8) if plot_title: circos.text(text=plot_title, size=plot_title_size, weight="normal") n_studies = len(sumstats_loaded) radii = compute_track_radii_dict( n_tracks=n_studies, pad=pad, r_min=r_min, r_max=r_max, annotate=bool(annotate), ) annotation_track_key = next(reversed(radii)) annotation_track_radius = radii[annotation_track_key] # Reverse so outermost track is plotted first radii_reversed = dict(reversed(list(radii.items()))) inside_loc = r_min - 3 outside_loc = r_max + 4 if annotate: annot_key = next(iter(radii_reversed)) annot_r = radii_reversed.pop(annot_key) outside_loc = max(list(radii_reversed.values())[0]) + 2 radii_reversed["annot_track_r"] = annot_r chrom_label_loc = outside_loc if chrom_label_side == "outside" else inside_loc if not signif_lines: signif_line = -np.log10(signif_line) if signif_line < 1 else signif_line suggest_line = -np.log10(1e-5) signif_lines = [ {"genome": signif_line, "suggestive": suggest_line} for _ in sumstats_loaded ] for index, (sector_radius, sumstats_key, sumstats_value, signif_dict) in enumerate( zip( radii_reversed.values(), sumstats_loaded.keys(), sumstats_loaded.values(), signif_lines, ) ): assoc = sumstats_value[0].copy() assoc["POS"] = assoc["POS"].astype(np.int32) if logp: assoc["logP"] = assoc["logP"].astype(np.float32) else: assoc["P"] = assoc["P"].astype(np.float32) sumstat_name = sumstats_key sig_thresh = signif_dict["genome"] sug_thresh = signif_dict["suggestive"] logger.info(f"Plotting : {sumstat_name}") assoc_by_chr = { chrom: df for chrom, df in assoc.groupby("CHR", sort=False) } for sector in circos.sectors: plot_circosm( sector=sector, sector_radius=sector_radius, sector_sizes=sector_sizes, track_index=index, chrom_label_loc=chrom_label_loc, chrom_label_size=chrom_label_size, track_label_size=track_label_size, track_label_orientation=track_label_orientation, assoc=assoc, assoc_by_chr=assoc_by_chr, assoc_label=sumstat_name, logp=logp, signif_line=sig_thresh, signif_threshold=sig_thresh, suggest_line=True if signif_line else False, suggest_threshold=sug_thresh, highlight=highlight, highlight_color=highlight_color, colors=colors, point_size=point_size, no_track_labels=no_track_labels ) # ------------------------------------------------------------------ # Circular: gene/SNP annotations # ------------------------------------------------------------------ if annotate and not hits_table.empty: label_col = get_annotation_column( annotate = annotate, hits_table=hits_table, label_col=label_col, ) if label_col == 'SNP': fstyle = "normal" else: fstyle = "italic" for i, (_, row) in enumerate(hits_table.iterrows()): label = row[label_col] for sector in circos.sectors: if str(row["CHR"]) == sector.name: a_track = sector.add_track(annotation_track_radius) a_track.axis(fc="none", lw=0, ec="none", alpha=0) r_low = annotation_track_radius[0] #r_high = annotation_track_radius[1] #r_pos = r_low if i % 2 == 0 else r_high pos = row["POS"] a_track.annotate( x=pos, label=str(label), min_r=r_low, max_r=r_low + 6, label_size=annotation_size, text_kws={ "size": "large", "color": "black", "alpha": 1, "fontstyle": fstyle, "fontweight": "normal", "multialignment": "left", }, ) if highlight_line: if not highlight_line_color: highlight_line_color = 'grey' sector_rlim = [t.r_lim for t in sector.tracks] sector_min_r = min(sector_rlim)[0] sector.line( r=[sector_min_r, r_low], start=pos, end=pos, alpha=0.4, color=highlight_line_color, lw=0.4, ls="--", ) # ------------------------------------------------------------------ # Circular: single y-axis label on last sector # ------------------------------------------------------------------ for sector in circos.sectors: if sector.name == list(sector_sizes.keys())[-1]: if ylabel is None: ylabel_text = "-log\u2081\u2080(P)" if logp else "P" else: ylabel_text = ylabel sector_rlim = [t.r_lim for t in sector.tracks] sector_min_r = min(sector_rlim)[0] sector_max_r = max(sector_rlim)[1] sector.text( ylabel_text, x=sector.end - (sector.end - sector.start) / 5, r=(sector_min_r + sector_max_r) / 2 + (sector_min_r + sector_max_r) / 12, adjust_rotation=False, ignore_range_error=True, size=float(track_label_size), color="black", fontstyle="italic", fontweight="regular", rotation=92, rotation_mode="default", va="top", ha="right", ) fig = circos.plotfig() if plt_name: fig.savefig(fname=plt_name.lower(), dpi=dpi) logger.info("Saved circular Manhattan plot: %s", plt_name.lower())
#return fig