Source code for pycmplot.plotting.linear

"""
pycmplot.plotting.linear
========================

Multi-track stacked linear Manhattan plot.

The module exposes two public functions:

* :func:`plot_linear` — the user-facing entry point.  Accepts the
  ``sumstats_loaded`` dict produced by
  :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`, resolves
  output paths, and delegates rendering to :func:`plot_linearm`.
* :func:`plot_linearm` — the core rendering engine.  Accepts a list of
  DataFrames and a fully resolved set of plotting parameters, builds the
  matplotlib figure, draws all tracks, and saves the file.

Internal helpers:

* :func:`_draw_annotation_arrows` — places angled
  :class:`~matplotlib.patches.FancyArrowPatch` arrows from spread gene
  labels down to their corresponding signal positions.
* :func:`_draw_annotation_arrows_multirail` — places angled
  :class:`~matplotlib.patches.FancyArrowPatch` arrows from spread gene
  labels down to their corresponding signal positions with multirail capability 
  and single sort + rank-reassignment to avoid arrow crossing.
"""

from __future__ import annotations

import logging
from typing import Optional

import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from matplotlib.patches import FancyArrowPatch
import matplotlib.transforms as mtransforms
from natsort import natsort_keygen

from pycmplot.constants import CHROM_ORDER
from pycmplot.io import get_output_paths
from pycmplot.annotation import get_annotation_column

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Annotation helpers
# ---------------------------------------------------------------------------
# Using cumulative distance for anntations and separating clusters
def _draw_annotation_arrows(
    ax,
    annot_df,
    chr_col: str,
    label_col: str,
    offsets: dict,
    chr_max: dict,
    spread_width: float = 60e6,
    isolation_threshold: float = 80e6,
    stack_threshold: float = 10e6,
    y_text_base: float = 0.55,
    y_stack_step: float = 0.02,
    y_tip: float = 0.0,
    asize: float = 8,
) -> None:
    """
    Single-rail annotation renderer with tiered label placement,
    chromosome-boundary spreading, cumulative-distance stacking, and
    curved arrows.

    Layout pipeline
    ---------------
    **Chromosome boundary detection**
        For each pair of adjacent chromosomes, the genomic gap between
        the last annotation on chromosome *k* and the first annotation
        on chromosome *k+1* is computed.  If the gap is narrower than
        ``spread_width``, both boundary annotations receive an
        ``x_bound`` value (negative for the left boundary label,
        positive for the right) so that the spreading step knows to
        push them apart.

    **Tiered label x-placement** (per chromosome)
        Each label is assigned an x position according to a two-tier
        rule based on its proximity to neighbours and chromosome
        boundaries:

        *Tier 1 — isolated labels*
            If the nearest neighbour and the nearest chromosome boundary
            are both at least ``isolation_threshold`` bp away, the label
            is placed directly above its signal (``x_text = x_signal``).

        *Tier 2 — dense / boundary labels*
            Labels that are too close to a neighbour or a chromosome
            boundary are spread.  Boundary labels are shifted left or
            right by ``spread_width / 3`` depending on which side of
            the boundary they fall.  All remaining dense labels are
            placed at regular ``spread_width`` intervals starting from
            the leftmost dense signal, with the interval reduced
            iteratively if the chromosome is narrower than
            ``spread_width``.

    **Cumulative-distance stacking** (per chromosome)
        After x positions are assigned, y positions are computed by
        walking labels left-to-right.  If two adjacent labels are
        closer than ``stack_threshold`` in x, the right label is
        stacked above the left by ``y_stack_step`` scaled by how much
        closer than ``stack_threshold`` the pair is.  Labels that are
        far enough apart reset to ``y_text_base``.

    **Arrows**
        A ``FancyArrowPatch`` with ``arc3`` connectionstyle connects
        each label's text position to its signal tip at ``y_tip``.

    Parameters
    ----------
    ax : matplotlib.axes.Axes
        Target axes on which annotations are drawn.
    annot_df : pd.DataFrame
        Annotation table.  Must contain ``chr_col``, ``"x"``
        (cumulative genomic position in bp), and ``label_col``.
        Rows are processed in chromosome then position order.
    chr_col : str
        Column name for chromosome identifiers.
    label_col : str
        Column name for annotation labels.
    offsets : dict
        Mapping of chromosome name → cumulative start offset (bp).
    chr_max : dict
        Mapping of chromosome name → chromosome length (bp).
    spread_width : float, optional
        Genomic interval (bp) used as the base spacing between dense
        labels and as the threshold for chromosome-boundary detection.
        Default 60e6.
    isolation_threshold : float, optional
        Minimum distance (bp) to both the nearest neighbour and the
        nearest chromosome boundary for a label to be considered
        isolated (Tier 1).  Default 80e6.
    stack_threshold : float, optional
        Maximum x-distance (bp) between adjacent labels below which
        the right label is stacked upward rather than reset to
        ``y_text_base``.  Default 10e6.
    y_text_base : float, optional
        Axes-fraction y-coordinate for the baseline label row.
        Default 0.55.
    y_stack_step : float, optional
        Base axes-fraction increment applied when stacking a label
        above its left neighbour.  The actual increment is scaled by
        ``1 - cum_dist / stack_threshold`` so tighter pairs stack
        higher.  Default 0.02.
    y_tip : float, optional
        Axes-fraction y-coordinate for arrow tips (signal level).
        Default 0.0.
    asize : float, optional
        Font size (pt) for rendered label text.  Default 8.
    """

    from natsort import natsorted

    annot_df = annot_df.sort_values(by=[chr_col, "x"], key=natsort_keygen())
    last_xtext = 0 - spread_width

    # ------------------------------------------------------------------
    # Chromosome boundary detection
    # ------------------------------------------------------------------
    # For each adjacent chromosome pair, mark the boundary annotations
    # with x_bound if the inter-chromosome gap is narrower than
    # spread_width.  x_bound is negative for the left boundary label
    # and positive for the right, encoding both the direction and the
    # magnitude of the gap for use in the spreading step below.
    chr_order = natsorted(set(annot_df[chr_col]))
    annot_df["x_bound"] = np.nan

    for chr1, chr2 in zip(chr_order[:-1], chr_order[1:]):
        df1 = annot_df[annot_df[chr_col] == chr1]
        df2 = annot_df[annot_df[chr_col] == chr2]

        if df1.empty or df2.empty:
            continue

        idx1 = df1["x"].idxmax()   # last annotation in chr1
        idx2 = df2["x"].idxmin()   # first annotation in chr2

        gap = (
            annot_df.loc[idx2, "x"]
            - annot_df.loc[idx1, "x"]
        )

        if gap <= spread_width:
            annot_df.loc[idx1, "x_bound"] = -gap
            annot_df.loc[idx2, "x_bound"] =  gap

    # ------------------------------------------------------------------
    # Per-chromosome label placement and rendering
    # ------------------------------------------------------------------
    for chr_name, df_chr in annot_df.groupby(chr_col, sort=False):
        df_chr    = df_chr.sort_values("x")
        chr_start = offsets[chr_name]
        chr_end   = offsets[chr_name] + chr_max[chr_name]
        chr_range = chr_end - chr_start

        x_signals = df_chr["x"].values
        x_bounds  = df_chr["x_bound"].values
        labels    = df_chr[label_col].values
        n         = len(x_signals)

        # --------------------------------------------------------------
        # Tiered x-position assignment
        # --------------------------------------------------------------
        # Tier 1 (isolated): label sits directly above its signal.
        # Tier 2 (dense / boundary): label is spread away from its
        #   signal.  Boundary labels are shifted by spread_width/3 in
        #   the direction away from the adjacent chromosome.  All other
        #   dense labels receive evenly-spaced positions at spread_width
        #   intervals, shifted further right if they would overlap the
        #   previous chromosome's last label.
        x_texts = []
        sw  = spread_width
        pad = sw / int(str(sw)[:2]) / 2

        for k, (x_sig, x_bnd) in enumerate(zip(x_signals, x_bounds)):
            neighbours         = np.delete(x_signals, k)
            min_neighbor_dist  = (
                np.min(np.abs(neighbours - x_sig))
                if len(neighbours) else np.inf
            )
            min_dist = min(min_neighbor_dist, np.abs(x_bnd))

            if min_dist >= isolation_threshold:
                # Tier 1: isolated — place directly above signal
                x_texts.append(x_sig)
            else:
                if not np.isnan(x_bnd):
                    # Chromosome boundary: shift away from the boundary
                    x_sig = x_sig - sw / 3 if x_bnd < 0 else x_sig + sw / 3
                    x_texts.append(x_sig)
                else:
                    # Tier 2: dense interior — defer to spread pass
                    x_texts.append(None)

        # Spread deferred (Tier 2) labels at regular intervals
        spread_indices = [k for k, v in enumerate(x_texts) if v is None]
        if spread_indices:
            # Reduce spread interval until it fits within the chromosome
            while sw > chr_range and sw > pad:
                sw -= pad

            sig_start = x_signals[spread_indices[0]]
            xmin      = sig_start - sw
            positions = np.arange(
                xmin,
                xmin + len(spread_indices) * sw,
                sw,
            )

            # Shift right if positions overlap the previous chromosome
            while positions[0] <= last_xtext:
                positions = positions + sw

            for j, k in enumerate(spread_indices):
                x_texts[k] = positions[j]

        # --------------------------------------------------------------
        # Cumulative-distance y stacking
        # --------------------------------------------------------------
        # Walk labels left-to-right.  If the x gap to the previous label
        # is below stack_threshold, stack the current label upward by
        # y_stack_step scaled by how tight the pair is.  Otherwise reset
        # to y_text_base.
        y_texts = [y_text_base] * n

        for k in range(1, n):
            cum_dist = abs(x_texts[k] - x_texts[k - 1])
            if cum_dist <= stack_threshold:
                y_texts[k] = (
                    y_texts[k - 1]
                    + y_stack_step
                    + y_stack_step * (1 - cum_dist / stack_threshold)
                )
            else:
                y_texts[k] = y_text_base

        # --------------------------------------------------------------
        # Draw arrows and labels
        # --------------------------------------------------------------
        for x_sig, x_txt, y_txt, label in zip(
            x_signals, x_texts, y_texts, labels
        ):
            # Straight arrows: this function handles sparse annotations
            # only; curvature is intentionally zero.  Dense annotations
            # are handled by _draw_annotation_arrows_multirail which uses
            # arc3 connectionstyle with non-zero rad.
            rad = 0

            arrow = FancyArrowPatch(
                (x_txt, y_txt),
                (x_sig, y_tip - 0.05),
                arrowstyle="-|>",
                mutation_scale=8,
                lw=0.6,
                color="grey",
                alpha=0.5,
                connectionstyle=f"arc3,rad={rad}",
                transform=ax.transData,
            )
            ax.add_patch(arrow)

            ax.text(
                x_txt,
                y_txt + 0.02,
                str(label),
                rotation=90,
                ha="center",
                va="bottom",
                fontsize=asize,
                clip_on=False,
                color="black",
                fontstyle="italic",
                fontweight="regular",
            )

        last_xtext = max(x_texts)

#----- multi-rail annotations
def _draw_annotation_arrows_multirail(
    ax,
    annot_df,
    chr_col: str,
    label_col: str,
    offsets: dict,
    chr_max: dict,
    spread_width: float = 60e6,
    y_text_base: float = 0.25,
    y_stack_step: float = 0.1,
    max_rad: float = 0.35,
    y_tip: float = 0.0,
    fsize: float = 8,
    rail_frac: float = 0.95,
    min_sep: float = 6e6,
    asize: float = 8,
) -> None:
    """
    Dense annotation renderer with relaxation-driven multi-rail
    stacking, linspace rank-reassignment, curved arrows, and adaptive
    ylim.

    Layout pipeline
    ---------------
    1. **Relaxation pass**:
       All labels are sorted by ``x_signal`` and a bidirectional
       relaxation loop enforces ``min_sep`` between every adjacent pair.
       Labels are pushed apart until no two are closer than ``min_sep``.
       The relaxed positions are stored as ``x_relaxed``.

    2. **Rail assignment**:
       Each label's rail is determined by how far its relaxed position
       drifted from its signal::

           drift    = x_relaxed - x_signal
           rail_id  = floor(drift / (rail_width / max_rails))

       Labels that drifted further are assigned to higher rails,
       distributing stacking proportionally to local density.  No
       per-rail queue processing or max_drift threshold is needed.

    3. **linspace rank-reassignment**:
       The layout is sorted by ``x_signal``.  ``x_text`` values are
       replaced with ``np.linspace(rail_start, rail_end, n)`` — evenly
       spaced slots spanning the full rail.  This guarantees:

       - ``x_text`` rank == ``x_signal`` rank → no arrow crossings by
         construction.
       - Full rail coverage regardless of ``rail_frac`` or signal
         clustering.
       - Even slot spacing = ``rail_width / (n − 1)``.

    4. **Rendering pass**:
       ``rail_id`` is read here for the first time to compute
       ``y = y_text_base + rail_id * y_stack_step``.

    Parameters
    ----------
    ax : matplotlib.axes.Axes
        Target axes.
    annot_df : pd.DataFrame
        Annotation table. Must contain ``chr_col``, ``"x"`` (cumulative
        genomic position in bp), and ``label_col``.
    chr_col : str
        Column name for chromosome identifiers.
    label_col : str
        Column name for annotation labels.
    offsets : dict
        Mapping of chromosome name → cumulative start offset (bp).
    chr_max : dict
        Mapping of chromosome name → chromosome length (bp).
    spread_width : float, optional
        Genomic window (bp) used for arrow tip jitter. Default 60e6.
    y_text_base : float, optional
        Axes-fraction y-coordinate for rail 0 labels. Default 0.25.
    y_stack_step : float, optional
        Axes-fraction increment per rail. Default 0.1.
    max_rad : float, optional
        Maximum arc curvature for ``FancyArrowPatch``. Default 0.35.
    y_tip : float, optional
        Axes-fraction y-coordinate for arrow tips. Default 0.0.
    fsize : float, optional
        Font size (pt) used to estimate label widths. Default 8.
    rail_frac : float, optional
        Fraction of genome width occupied by the label rail. Default 0.95.
    min_sep : float, optional
        Minimum genomic separation (bp) between any two adjacent label
        centres. Default 6e6.
    asize : float, optional
        Font size (pt) for rendered label text. Default 8.
    """

    # ------------------------------------------------------------------
    # Deduplication
    # ------------------------------------------------------------------
    annot_df = annot_df.drop_duplicates(subset=[chr_col, "x", label_col])

    # ------------------------------------------------------------------
    # Sort annotations
    # ------------------------------------------------------------------
    annot_df = (
        annot_df
        .sort_values(by=[chr_col, "x"], key=natsort_keygen())
        .reset_index(drop=True)
    )

    x_signals = annot_df["x"].to_numpy(dtype=float)
    labels    = annot_df[label_col].astype(str).to_numpy()
    n         = len(x_signals)

    if n == 0:
        return

    # ------------------------------------------------------------------
    # Genome span and rail bounds
    # ------------------------------------------------------------------
    genome_start = min(offsets.values())
    genome_end   = max(offsets[c] + chr_max[c] for c in chr_max)
    genome_width = genome_end - genome_start
    rail_width   = genome_width * rail_frac
    rail_start   = genome_start + (genome_width - rail_width) / 2
    rail_end     = rail_start + rail_width

    # ------------------------------------------------------------------
    # Auto char_width from axes geometry
    # ------------------------------------------------------------------
    # For vertical text (rotation=90°) the horizontal footprint of every
    # label is one character wide regardless of string length.
    try:
        fig         = ax.get_figure()
        renderer    = fig.canvas.get_renderer()
        ax_bbox     = ax.get_window_extent(renderer=renderer)
        xmin, xmax  = ax.get_xlim()
        px_per_bp   = ax_bbox.width / (xmax - xmin)
        char_width  = 0.6 * fsize * (fig.dpi / 72.0) / px_per_bp
    except Exception:
        char_width  = genome_width * 0.01 / fsize

    # ------------------------------------------------------------------
    # 1. Relaxation pass
    # ------------------------------------------------------------------
    # Start from x_signal positions and enforce min_sep between every
    # adjacent pair.  Bidirectional passes (rightward then leftward)
    # distribute pressure symmetrically so labels spread around their
    # signals rather than cascading in one direction.
    x_relaxed      = x_signals.copy()
    max_relax_iter = 100

    for _ in range(max_relax_iter):
        moved = False

        # Rightward pass
        for i in range(1, n):
            gap = x_relaxed[i] - x_relaxed[i - 1]
            if gap < min_sep:
                x_relaxed[i] = x_relaxed[i - 1] + min_sep
                moved = True

        # Leftward pass
        for i in range(n - 2, -1, -1):
            gap = x_relaxed[i + 1] - x_relaxed[i]
            if gap < min_sep:
                x_relaxed[i] = x_relaxed[i + 1] - min_sep
                moved = True

        if not moved:
            break

    # ------------------------------------------------------------------
    # 2. Rail assignment from relaxation drift
    # ------------------------------------------------------------------
    # How far each label drifted from its signal during relaxation is a
    # direct measure of local density: labels in dense regions drift more
    # and should be stacked higher.  We bin the drift into rails using
    # a stride of (rail_width / max_rails) so rails fill proportionally.
    max_rails  = 10
    rail_stride = rail_width / max_rails
    drift       = np.abs(x_relaxed - x_signals)
    rail_ids    = np.clip(
        (drift / rail_stride).astype(int),
        0,
        max_rails - 1,
    )

    # ------------------------------------------------------------------
    # 3. linspace rank-reassignment
    # ------------------------------------------------------------------
    # Sort by x_signal and assign evenly-spaced x_text slots across the
    # full [rail_start, rail_end] range.  This guarantees:
    #   - x_text rank == x_signal rank → no arrow crossings
    #   - Full, even rail coverage regardless of rail_frac
    sig_order = np.argsort(x_signals)
    x_texts   = np.empty(n)
    slots     = (
        np.linspace(rail_start, rail_end, n)
        if n > 1
        else np.array([(rail_start + rail_end) / 2])
    )
    x_texts[sig_order] = slots

    # ------------------------------------------------------------------
    # Build layout table
    # ------------------------------------------------------------------
    layout = pd.DataFrame({
        "label"    : labels,
        "x_signal" : x_signals,
        "x_text"   : x_texts,
        "rail_id"  : rail_ids,
    }).sort_values("x_signal").reset_index(drop=True)

    # ------------------------------------------------------------------
    # 4. Rendering pass — rail_id first used here
    # ------------------------------------------------------------------
    jitter = np.linspace(
        -spread_width * 0.03,
        spread_width * 0.03,
        len(layout),
    )

    for i, row in enumerate(layout.itertuples(index=False)):
        x_sig = row.x_signal
        x_txt = row.x_text
        r_idx = int(row.rail_id)
        label = row.label
        x_tip = x_sig + jitter[i]
        y_txt = y_text_base + r_idx * y_stack_step

        dx  = x_txt - x_sig
        rad = np.clip(
            dx / (genome_width * 0.15),
            -max_rad,
            max_rad,
        )

        if r_idx == 0:
            anglea = 0
            arma   = 0
            armb   = 30
        else:
            anglea = -90
            arma   = 90 * r_idx
            armb   = 30

        arrow = FancyArrowPatch(
            (x_txt, y_txt),
            (x_tip, y_tip),
            arrowstyle="-|>",
            mutation_scale=6,
            lw=0.4,
            color="grey",
            alpha=0.5,
            connectionstyle=(
                f"arc,"
                f"angleA={anglea},"
                f"armA={arma},"
                f"angleB=90,"
                f"armB={armb},"
                f"rad={rad}"
            ),
            transform=ax.transData,
        )
        ax.add_patch(arrow)

        ax.text(
            x_txt + r_idx * spread_width * 0.12,
            y_txt + 0.001,
            str(label),
            rotation=90,
            ha="center",
            va="bottom",
            fontsize=asize,
            clip_on=False,
            color="black",
            fontstyle="italic",
            fontweight="regular",
        )

    # ------------------------------------------------------------------
    # Adaptive ylim
    # ------------------------------------------------------------------
    max_rail = int(layout["rail_id"].max())
    ax.set_ylim(
        y_tip - 0.05,
        y_text_base + (max_rail + 2) * y_stack_step,
    )


def _draw_annotation_arrows_multirail1(
    ax,
    annot_df,
    chr_col: str,
    label_col: str,
    offsets: dict,
    chr_max: dict,
    spread_width: float = 60e6,
    y_text_base: float = 0.25,
    y_stack_step: float = 0.1,
    max_rad: float = 0.35,
    y_tip: float = 0.0,
    fsize: float = 8,
    rail_frac: float = 0.95,
    min_sep: float = 6e6,
    asize: float = 8,
) -> None:
    """
    Dense annotation renderer with relaxation-driven multi-rail
    stacking, linspace rank-reassignment, alternating rail stagger,
    curved arrows, and adaptive ylim.

    Layout pipeline
    ---------------
    1. **Relaxation pass**:
       All labels are sorted by ``x_signal`` and a bidirectional
       relaxation loop enforces ``min_sep`` between every adjacent pair.
       Labels are pushed apart until no two are closer than ``min_sep``.
       The relaxed positions are stored as ``x_relaxed``.

    2. **Rail assignment from relaxation drift**:
       Each label's rail is determined by how far its relaxed position
       drifted from its signal::

           drift    = |x_relaxed − x_signal|
           rail_id  = clip(floor(drift / rail_stride), 0, max_rails − 1)

       Labels in dense regions drift more and receive higher rail
       indices proportionally.  ``rail_stride = rail_width / max_rails``
       so rail assignment scales correctly with ``rail_frac``.

    3. **linspace rank-reassignment**:
       Labels are sorted by ``x_signal`` and assigned evenly-spaced
       ``x_text`` slots via ``np.linspace(rail_start, rail_end, n)``.
       This guarantees:

       - ``x_text`` rank == ``x_signal`` rank → no arrow crossings by
         construction.
       - Full rail coverage regardless of ``rail_frac`` or signal
         clustering.
       - Even slot spacing = ``rail_width / (n − 1)``.

    4. **Cyclic rail stagger (k_min-computed)**:
       The minimum number of rails required to avoid horizontal slot
       overlap is computed from the actual slot interval and
       ``char_width``, with a 1.5× safety factor to account for
       underestimation of rendered glyph widths by the ``0.6 * fsize``
       approximation::

           slot_interval  = rail_width / (n − 1)
           k_min          = ceil(1.5 * char_width / slot_interval)

       Labels are then assigned a stagger offset of ``rank % k_min``
       (cycling through 0, 1, …, k_min−1 in left-to-right
       ``x_signal`` order) which is added to their drift-assigned
       ``rail_id``.  This guarantees same-rail neighbours are at least
       ``k_min * slot_interval >= 1.5 * char_width`` apart.  At lower
       ``rail_frac`` or larger ``fsize``, ``slot_interval`` shrinks and
       ``k_min`` grows automatically, creating more rails as needed.

    5. **Rendering pass**:
       ``rail_id`` is read here for the first time to compute
       ``y = y_text_base + rail_id * y_stack_step``.

    Parameters
    ----------
    ax : matplotlib.axes.Axes
        Target axes.
    annot_df : pd.DataFrame
        Annotation table. Must contain ``chr_col``, ``"x"`` (cumulative
        genomic position in bp), and ``label_col``.
    chr_col : str
        Column name for chromosome identifiers.
    label_col : str
        Column name for annotation labels.
    offsets : dict
        Mapping of chromosome name → cumulative start offset (bp).
    chr_max : dict
        Mapping of chromosome name → chromosome length (bp).
    spread_width : float, optional
        Genomic window (bp) used for arrow tip jitter. Default 60e6.
    y_text_base : float, optional
        Axes-fraction y-coordinate for rail 0 labels. Default 0.25.
    y_stack_step : float, optional
        Axes-fraction increment per rail. Default 0.1.
    max_rad : float, optional
        Maximum arc curvature for ``FancyArrowPatch``. Default 0.35.
    y_tip : float, optional
        Axes-fraction y-coordinate for arrow tips. Default 0.0.
    fsize : float, optional
        Font size (pt) used to estimate label widths. Default 8.
    rail_frac : float, optional
        Fraction of genome width occupied by the label rail. Default 0.95.
    min_sep : float, optional
        Minimum genomic separation (bp) between any two adjacent label
        centres. Default 6e6.
    asize : float, optional
        Font size (pt) for rendered label text. Default 8.
    """

    # ------------------------------------------------------------------
    # Deduplication
    # ------------------------------------------------------------------
    annot_df = annot_df.drop_duplicates(subset=[chr_col, "x", label_col])

    # ------------------------------------------------------------------
    # Sort annotations
    # ------------------------------------------------------------------
    annot_df = (
        annot_df
        .sort_values(by=[chr_col, "x"], key=natsort_keygen())
        .reset_index(drop=True)
    )

    x_signals = annot_df["x"].to_numpy(dtype=float)
    labels    = annot_df[label_col].astype(str).to_numpy()
    n         = len(x_signals)

    if n == 0:
        return

    # ------------------------------------------------------------------
    # Genome span and rail bounds
    # ------------------------------------------------------------------
    genome_start = min(offsets.values())
    genome_end   = max(offsets[c] + chr_max[c] for c in chr_max)
    genome_width = genome_end - genome_start
    rail_width   = genome_width * rail_frac
    rail_start   = genome_start + (genome_width - rail_width) / 2
    rail_end     = rail_start + rail_width

    # ------------------------------------------------------------------
    # Auto char_width from axes geometry
    # ------------------------------------------------------------------
    # For vertical text (rotation=90°) the horizontal footprint of every
    # label is one character wide regardless of string length.
    try:
        fig         = ax.get_figure()
        renderer    = fig.canvas.get_renderer()
        ax_bbox     = ax.get_window_extent(renderer=renderer)
        xmin, xmax  = ax.get_xlim()
        px_per_bp   = ax_bbox.width / (xmax - xmin)
        char_width  = 0.6 * fsize * (fig.dpi / 72.0) / px_per_bp
    except Exception:
        char_width  = genome_width * 0.01 / fsize

    # ------------------------------------------------------------------
    # 1. Relaxation pass
    # ------------------------------------------------------------------
    # Start from x_signal positions and enforce min_sep between every
    # adjacent pair.  Bidirectional passes (rightward then leftward)
    # distribute pressure symmetrically so labels spread around their
    # signals rather than cascading in one direction.
    x_relaxed      = x_signals.copy()
    max_relax_iter = 50

    for _ in range(max_relax_iter):
        moved = False

        # Rightward pass
        for i in range(1, n):
            gap = x_relaxed[i] - x_relaxed[i - 1]
            if gap < min_sep:
                x_relaxed[i] = x_relaxed[i - 1] + min_sep
                moved = True

        # Leftward pass
        for i in range(n - 2, -1, -1):
            gap = x_relaxed[i + 1] - x_relaxed[i]
            if gap < min_sep:
                x_relaxed[i] = x_relaxed[i + 1] - min_sep
                moved = True

        if not moved:
            break

    # ------------------------------------------------------------------
    # 2. Rail assignment from relaxation drift
    # ------------------------------------------------------------------
    # How far each label drifted from its signal during relaxation is a
    # direct measure of local density: labels in dense regions drift more
    # and should be stacked higher.  We bin the drift into rails using
    # a stride of (rail_width / max_rails) so rails fill proportionally.
    max_rails   = 10
    rail_stride = rail_width / max_rails
    drift       = np.abs(x_relaxed - x_signals)
    rail_ids    = np.clip(
        (drift / rail_stride).astype(int),
        0,
        max_rails - 1,
    )

    # ------------------------------------------------------------------
    # 3. linspace rank-reassignment
    # ------------------------------------------------------------------
    # Sort by x_signal and assign evenly-spaced x_text slots across the
    # full [rail_start, rail_end] range.  This guarantees:
    #   - x_text rank == x_signal rank → no arrow crossings
    #   - Full, even rail coverage regardless of rail_frac
    sig_order = np.argsort(x_signals)
    x_texts   = np.empty(n)
    slots     = (
        np.linspace(rail_start, rail_end, n)
        if n > 1
        else np.array([(rail_start + rail_end) / 2])
    )
    x_texts[sig_order] = slots

    # ------------------------------------------------------------------
    # 4. Compute minimum rails required (k_min) and apply cyclic stagger
    # ------------------------------------------------------------------
    # With n labels evenly spaced across rail_width, the slot interval is:
    #   slot_interval = rail_width / (n - 1)
    #
    # With a cyclic stagger of k rails, same-rail neighbours are k slots
    # apart, giving a same-rail interval of k * slot_interval.
    # The no-overlap condition requires:
    #   k * slot_interval >= char_width
    #   → k_min = ceil(char_width / slot_interval)
    #
    # A safety factor of 1.5 is applied to char_width to account for
    # the fact that 0.6 * fsize underestimates the true rendered glyph
    # width, which varies by font and renderer.  Without this correction
    # k_min is systematically too small at larger font sizes and lower
    # rail_frac values, causing labels to still visually overlap even
    # after staggering.
    #
    # The stagger cycles through k_min values (0, 1, …, k_min-1) in
    # left-to-right x_signal order.  The drift-based rail_id is used as
    # a base elevation and the stagger offset is added on top, so dense
    # regions are still elevated relative to sparse ones while adjacent
    # labels are guaranteed to be on different rails.
    slot_interval      = rail_width / max(n - 1, 1)
    char_width_safe    = char_width * 3
    k_min              = max(1, int(np.ceil(char_width_safe / slot_interval)))

    for rank, idx in enumerate(sig_order):
        stagger         = rank % k_min
        rail_ids[idx]   = min(rail_ids[idx] + stagger, max_rails - 1)

    # ------------------------------------------------------------------
    # Build layout table
    # ------------------------------------------------------------------
    layout = pd.DataFrame({
        "label"    : labels,
        "x_signal" : x_signals,
        "x_text"   : x_texts,
        "rail_id"  : rail_ids,
    }).sort_values("x_signal").reset_index(drop=True)

    # ------------------------------------------------------------------
    # 4. Rendering pass — rail_id first used here
    # ------------------------------------------------------------------
    jitter = np.linspace(
        -spread_width * 0.03,
        spread_width * 0.03,
        len(layout),
    )

    for i, row in enumerate(layout.itertuples(index=False)):
        x_sig = row.x_signal
        x_txt = row.x_text
        r_idx = int(row.rail_id)
        label = row.label
        x_tip = x_sig + jitter[i]
        y_txt = y_text_base + r_idx * y_stack_step

        dx  = x_txt - x_sig
        rad = np.clip(
            dx / (genome_width * 0.15),
            -max_rad,
            max_rad,
        )

        if r_idx == 0:
            anglea = 0
            arma   = 0
            armb   = 30
        else:
            anglea = -90
            arma   = 90 * r_idx
            armb   = 30

        arrow = FancyArrowPatch(
            (x_txt, y_txt),
            (x_tip, y_tip),
            arrowstyle="-|>",
            mutation_scale=6,
            lw=0.4,
            color="grey",
            alpha=0.5,
            connectionstyle=(
                f"arc,"
                f"angleA={anglea},"
                f"armA={arma},"
                f"angleB=90,"
                f"armB={armb},"
                f"rad={rad}"
            ),
            transform=ax.transData,
        )
        ax.add_patch(arrow)

        ax.text(
            x_txt + r_idx * spread_width * 0.12,
            y_txt + 0.001,
            str(label),
            rotation=90,
            ha="center",
            va="bottom",
            fontsize=asize,
            clip_on=False,
            color="black",
            fontstyle="italic",
            fontweight="regular",
        )

    # ------------------------------------------------------------------
    # Adaptive ylim
    # ------------------------------------------------------------------
    max_rail = int(layout["rail_id"].max())
    ax.set_ylim(
        y_tip - 0.05,
        y_text_base + (max_rail + 2) * y_stack_step,
    )



# ---------------------------------------------------------------------------
# Public function
# ---------------------------------------------------------------------------
[docs] def plot_linearm( tracks: list, track_labels: Optional[list[str]] = None, annot_df: pd.DataFrame = None, annotate: bool = False, annotation_size: float = 8, highlight: bool = False, highlight_color: str = 'brown', highlight_line: bool = False, highlight_line_color: str = 'grey', logp: bool = True, label_col: Optional[str] = 'SNP', chr_order: Optional[list[str]] = None, chr_spacing: float = 9e6, track_heights: Optional[list[float]] = None, linear_track_spacing: float = 0.10, annot_rail_frac: float = 0.95, point_size: float = 8, colors: Optional[list[str]] = ['steelblue','silver'], sig_lines: Optional[list[dict]] = None, plt_name: Optional[str] = None, no_track_labels: bool = False, ylabel: Optional[str] = None, fig_format: Optional[str] = None, dpi: int = 300, figsize: Optional[list[float]] = [10, 4], ): """Core rendering engine for the multi-track stacked linear Manhattan plot. Builds a :class:`~matplotlib.figure.Figure` with one annotation sub-panel at the top (for gene/SNP labels and connecting arrows) and *n* data tracks below it, one per element of *tracks*. All tracks share the same cumulative x-axis. This function is called by the higher-level :func:`plot_linear` wrapper and is not normally invoked directly. Parameters ---------- tracks : list of pandas.DataFrame One DataFrame per GWAS trait. Each must have columns ``CHR``, ``POS``, ``P``, and ``logP`` (when *logp* is ``True``). The DataFrames are pre-processed internally (chromosome normalisation, highlighting, cumulative x-axis computation). track_labels : list of str, optional Y-axis label for each track, in the same order as *tracks*. Defaults to ``['Track 1', 'Track 2', …]``. annot_df : pandas.DataFrame, optional Annotation DataFrame (typically the hits summary table). Must have columns ``CHR``, ``POS``, and *label_col*. When provided, a dashed vertical guide line is drawn through every annotated position across all tracks, and gene/SNP label arrows are drawn in the top sub-panel. highlight : bool, optional If ``True``, variants within 500 kb of a lead SNP (as determined by :func:`~pycmplot.stats.get_highlight_snps`) are rendered in brown. Default ``False``. highlight_thresh : float, optional P-value threshold used for locus highlighting. Default ``5e-8``. trim_pval : float, optional Reserved for future use; trimming is currently handled upstream in :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`. logp : bool, optional If ``True``, plot –log₁₀(p) on the y-axis; requires a ``logP`` column in each DataFrame. Default ``True``. label_col : str, optional Column in *annot_df* to use as annotation text labels (e.g. ``'top_gene'``). Default ``'label'``. chr_order : list of str, optional Chromosome display order. Defaults to :data:`~pycmplot.constants.CHROM_ORDER` (autosomes 1–22, X, Y, MT). chr_spacing : float, optional Gap in base-pairs inserted between consecutive chromosomes on the x-axis. Default ``9e6``. track_heights : list of float, optional Relative height ratios for the gridspec rows. The first element controls the annotation sub-panel; subsequent elements control the data tracks. When ``None``, the annotation panel is given a weight of 1 and each data track a weight of 3. linear_track_spacing : float, optional Vertical ``hspace`` between tracks as a fraction of average track height. Default ``0.10``. annot_rail_frac : float, optional Fraction of horizontal space covering the center of the annotation track within which to place annotation texts. Default ``0.98`` (annotation texts will cover 98% of annotation track horizontally) point_size : float, optional Scatter-plot point size passed to :func:`matplotlib.axes.Axes.scatter`. Default ``5``. colors : list of str, optional Two alternating matplotlib colour strings for even/odd chromosomes. Default ``['steelblue', 'orange']``. sig_lines : list of dict, optional One ``{'genome': float, 'suggestive': float}`` dict per track. ``'genome'`` values are drawn as red dashed lines; ``'suggestive'`` values as grey dashed lines. plt_name : str, optional Full output file path (including extension). When provided the figure is saved to disk. no_track_labels : bool, optional If ``True``, per-track labels are suppressed. Default ``False``. ylabel : str, optional Shared y-axis label rendered on the left of the figure. Use this to set a sensible label for non-p-value statistics such as iHS, FST or XP-EHH (e.g. ``ylabel="iHS"``). When ``None`` (the default), the label is derived automatically: ``"-log₁₀(p-value)"`` when *logp* is ``True``, otherwise the p-value column name (``"P"``). fig_format : str, optional Output image format (``'png'``, ``'pdf'``, ``'svg'``). Inferred from *plt_name*'s extension when ``None``. dpi : int, optional Output resolution in dots per inch. Default ``300``. figsize : tuple of (float, float), optional Figure dimensions ``(width, height)`` in inches. Default ``(15, 9)``. Returns ------- fig : matplotlib.figure.Figure The completed figure. axes : list of matplotlib.axes.Axes All axes in the figure: ``axes[0]`` is the annotation sub-panel; ``axes[1:]`` are the data-track axes in the same order as *tracks*. See Also -------- plot_linear : User-facing wrapper that extracts DataFrames from the ``sumstats_loaded`` dict and resolves output paths before calling this function. _draw_annotation_arrows : Called internally to render gene/SNP label arrows in ``axes[0]``. pycmplot.stats.get_highlight_snps : Called internally when *highlight* is ``True``. """ chr_col = "CHR" pos_col = "POS" p_col = "P" annot_df = annot_df.drop_duplicates(subset=[chr_col, pos_col, label_col]) if chr_order is None: chr_order = CHROM_ORDER chr_to_idx = {c: i for i, c in enumerate(chr_order)} # ------------------------------------------------------------------ # Prep DataFrames # ------------------------------------------------------------------ def _prep(df): df = df.copy() # Fast path: io.py's loader stores CHR as a Categorical with # ``CHROM_ORDER`` as the canonical category set. When that's the # case we can derive ``chr_idx`` from ``cat.codes`` directly, # skipping ~0.7 s of redundant string normalisation on a 500K-row # frame (and proportionally more on larger frames). chr_series = df[chr_col] is_canonical_cat = ( isinstance(chr_series.dtype, pd.CategoricalDtype) and list(chr_series.cat.categories) == list(chr_order) ) if is_canonical_cat: df["chr_idx"] = chr_series.cat.codes.to_numpy() else: # Defensive normalisation for callers that bypass io.py (e.g. # users feeding hand-built DataFrames straight to plot_linearm). df[chr_col] = ( chr_series .astype(str) .str.replace("chr", "", regex=False) .str.upper() .replace({"23": "X", "24": "Y", "M": "MT", "MTDNA": "MT"}) ) df = df[df[chr_col].isin(chr_order)] df["chr_idx"] = df[chr_col].map(chr_to_idx) return df.sort_values(["chr_idx", pos_col]) tracks = [_prep(df) for df in tracks] chr_maxes = [] for df in tracks: # ``observed=True`` skips categorical levels with no rows in this # particular track; ``s.get(c, 0)`` below handles those cases. # Without this, pandas returns ``NA`` for the empty groups, which # later propagates as ``TypeError: boolean value of NA is # ambiguous`` in the ``xmax = max(...)`` reduction. chr_maxes.append( df.groupby(chr_col, observed=True)[pos_col].max() ) if annot_df is not None: annot_df = _prep(annot_df) # ------------------------------------------------------------------ # Cumulative x-axis positions # ------------------------------------------------------------------ chr_max: dict[str, float] = {} offsets: dict[str, float] = {} offset = 0.0 for c in chr_order: max_pos = max( [s.get(c, 0) for s in chr_maxes] ) chr_max[c] = max_pos offsets[c] = offset offset += max_pos + chr_spacing def _add_cum(df): df = df.copy() df["x"] = ( df[pos_col].to_numpy() + df[chr_col].map(offsets).to_numpy() ) return df tracks = [_add_cum(df) for df in tracks] if annot_df is not None: #annot_df = _add_cum(annot_df) x_lookup = pd.concat([ df[[chr_col, pos_col, 'LABEL', "x"]] for (label, df) in zip(track_labels, tracks) ]) annot_df = annot_df.merge( x_lookup, on=[chr_col, pos_col, 'LABEL'], how="left" ) # ------------------------------------------------------------------ # Figure layout # ------------------------------------------------------------------ n_tracks = len(tracks) # ------------------------------------------------------------------ # Track heights sanity check # ------------------------------------------------------------------ # When annotating, track_heights must have n_tracks + 1 values: # one for the annotation track (first/top) and one per data track. # When not annotating, track_heights must have exactly n_tracks values. expected_n = n_tracks + 1 if annotate else n_tracks if track_heights is None: track_heights = ([1] + [3] * n_tracks) if annotate else ([3] * n_tracks) else: try: if len(track_heights) != expected_n: raise ValueError( f"track_heights has {len(track_heights)} values but " f"{expected_n} are required" + (" (one extra for the annotation track)" if annotate else "") ) except Exception: track_heights = [float(x) for x in track_heights] # ------------------------------------------------------------------ # y-label position # ------------------------------------------------------------------ # Tracks are laid out top-to-bottom, so the annotation track occupies # the top portion of the axes and data tracks fill the remainder below. # The y-label (-log10(P)) should sit at the vertical midpoint of the # data track region in axes-fraction coordinates (0 = bottom, 1 = top). # # Data tracks span [0, data_total/total_height] from the bottom, # so their midpoint in axes-fraction is data_total / (2 * total_height). total_height = sum(track_heights) data_heights = track_heights[1:] if annotate else track_heights data_total = sum(data_heights) y_lab_pos = data_total / (2 * total_height) fig = plt.figure(figsize=figsize) gs = fig.add_gridspec( expected_n, 1, height_ratios=track_heights, hspace=linear_track_spacing, ) if annotate: ax_annot = fig.add_subplot(gs[0, 0]) axes = [ax_annot] for i in range(n_tracks): axes.append(fig.add_subplot(gs[i + 1, 0], sharex=ax_annot)) else: first_ax = fig.add_subplot(gs[0, 0]) axes = [first_ax] for i in range(n_tracks-1): axes.append(fig.add_subplot(gs[i+1, 0], sharex=first_ax)) # Per-track highlight colours from tab20 colormap cmap = plt.get_cmap("tab20") hex_colors = [mcolors.to_hex(cmap(i / n_tracks)) for i in range(n_tracks)] # ------------------------------------------------------------------ # Plot data tracks # ------------------------------------------------------------------ t_labels = track_labels or [f"Track {i+1}" for i in range(n_tracks)] if annotate: loop_axes = axes[1:] else: loop_axes = axes left_pad = chr_spacing * 0.2 xmax = max(offsets[c] + chr_max[c] for c in chr_order) for i, (ax, df, t_label, h_color) in enumerate( zip(loop_axes, tracks, t_labels, hex_colors) ): logger.info("Plotting: %s ...", t_label) # Sanity filter. When plotting -log10(p) we assume the P column is a # p-value and drop non-positive entries (a common product of upstream # NaN -> 0 coercion or mistyped headers). When plotting raw values # we keep every ro(w, since selection statistics such as iHS, XP-EHH # or Fay & Wu's H can legitimately be negative. color_cycle is # computed *after* filtering so the color array always aligns with # the scatter x/y arrays. if logp: df = df[df[p_col] >= 0] # Render markers via ``ax.plot`` (one Line2D per chromosome) rather # than ``ax.scatter`` (which builds a PathCollection: one Path per # point, with a per-point ``should_simplify`` check and a 4×N RGBA # array). At 10 M variants this cuts the matplotlib draw step from # several minutes to a few seconds while producing visually # identical output (the rasterised PNG/PDF backend treats the # vertices the same way either way). Marker size is converted from # the scatter ``s`` unit (points²) to the plot ``markersize`` unit # (points) via ``sqrt`` so existing point-size values keep meaning. y_col = "logP" if logp else p_col x_all = df["x"].to_numpy() y_all = df[y_col].to_numpy() chr_idx_all = df["chr_idx"].to_numpy() # ``point_size`` is in scatter units (points²); convert to plot's # markersize (points) so existing user-supplied values still mean # the same visual size. Guard against ``None`` propagated from # callers that don't set the parameter explicitly. _ps = 4.0 if point_size is None else float(point_size) markersize = float(np.sqrt(_ps)) for i_chr in np.unique(chr_idx_all): mask = chr_idx_all == i_chr ax.plot( x_all[mask], y_all[mask], marker="o", linestyle="none", color=colors[int(i_chr) % len(colors)], markersize=markersize, markeredgecolor="none", rasterized=True, ) if y_all.size: ax.set_ylim(y_all.min(), y_all.max()+2) # Track labels — rendered vertically in the right margin (just past # the last chromosome), orthogonal to the track-stacking direction. # This keeps the label out of the data region, survives arbitrarily # tight packing (``linear_track_spacing=0`` onward), and never # collides with the shared y-axis label on the left. if not no_track_labels: ax.text( 1.005, 0.5, t_label, transform=ax.transAxes, ha="left", va="center", rotation=-90, fontsize=10, ) if highlight: sig = df[df["in_locus"]] if not sig.empty: sig_y = sig["logP"] if logp else sig[p_col] # Highlight points need an explicit ``zorder`` higher than # the background scatter. The background is drawn with # ``ax.plot`` (Line2D, default zorder=2), while # ``ax.scatter`` returns a PathCollection whose default # zorder is **1**. Without the explicit bump below the # highlight ends up *underneath* the background despite # being called later in code. ``zorder=3`` puts the # highlight cleanly on top of both background scatter and # any axhline / axvline guides. ax.scatter( sig["x"].to_numpy(), sig_y.to_numpy(), s=point_size, marker="o", color=highlight_color, edgecolors="none", zorder=3, rasterized=True, ) # Vertical lines across all data tracks at highlight positions if highlight_line: for x in sig["x"].values: for _ax in loop_axes: _ax.axvline(x, color=highlight_line_color, alpha=0.1, linewidth=0.2, linestyle="--", zorder=0) if sig_lines is not None and i < len(sig_lines): sl = sig_lines[i] if "genome" in sl: ax.axhline(y=sl["genome"], color="orangered", linestyle="--", linewidth=0.5) if "suggestive" in sl: ax.axhline(y=sl["suggestive"], color="navy", linestyle="--", linewidth=0.5) ax.spines[["top", "right"]].set_visible(False) ax.set_xlim(-left_pad, xmax) # ------------------------------------------------------------------ # Annotation track # ------------------------------------------------------------------ if annotate and annot_df is not None: less_than_spread_width = [] s_width = 20e6 for chr, df in annot_df.groupby(chr_col): df_chr = df[df[chr_col]==chr] differences = np.diff(df_chr['POS']).tolist() less_than_spread_width.append(list(filter(lambda x: x < s_width, differences))) less_than_spread_width = [l for l in less_than_spread_width if not len(l) == 0] if len(less_than_spread_width) < 5: _draw_annotation_arrows( ax_annot, annot_df, chr_col=chr_col, label_col=label_col, offsets=offsets, chr_max=chr_max, isolation_threshold=80e6, stack_threshold=10e6, y_tip=0.0, y_text_base=0.3, y_stack_step=0.17, spread_width=60e6, asize=annotation_size, ) else: _draw_annotation_arrows_multirail( ax_annot, annot_df, chr_col=chr_col, label_col=label_col, offsets=offsets, chr_max=chr_max, spread_width=s_width, asize=annotation_size, rail_frac=annot_rail_frac, y_tip=0.0, y_text_base=0.3, y_stack_step=0.17, min_sep=6e6, ) ax_annot.set_ylim(0, 1) ax_annot.axis("off") # ------------------------------------------------------------------ # Chromosome labels on x-axis # ------------------------------------------------------------------ xticks, xlabels = [], [] for c in chr_order: if chr_max[c] == 0: continue start = offsets[c] end = offsets[c] + chr_max[c] mid = (start + end) / 2 xticks.append(mid) xlabels.append(c) for ax in axes: ax.axvline(end, color="lightgray", linewidth=0.1, alpha=0.05) axes[-1].set_xticks(xticks) axes[-1].set_xticklabels(xlabels) axes[-1].set_xlabel("Chromosome", fontsize=12) for ax in axes[:-1]: ax.tick_params(axis="x", which="both", bottom=False, labelbottom=False) ax.spines[["top", "right", "bottom"]].set_visible(False) # Explicit margins keep the shared y-axis label (added below as # ``fig.text``) in its own reserved strip on the left of the figure. # We avoid ``tight_layout`` here because the annotation panel shares # ``sharex`` with the data tracks inside a custom ``GridSpec``, which # triggers a matplotlib "not compatible with tight_layout" warning # without actually helping the layout. # Reserve margins explicitly: the left strip holds the shared y-axis # label, the right strip holds the per-track labels placed just past # the last chromosome. ``hspace`` is forwarded verbatim so users can # set ``linear_track_spacing=0`` to stack tracks flush against each # other — the right-side track labels remain readable because they # are orthogonal to the stacking direction. fig.subplots_adjust( left=0.09, right=0.94, top=0.90, bottom=0.15, hspace=linear_track_spacing, ) if ylabel is None: ylabel_text = "-log\u2081\u2080(P)" if logp else p_col else: ylabel_text = ylabel fig.text( 0.025, y_lab_pos, ylabel_text, va="center", rotation="vertical", fontsize=12, ) if plt_name: fmt = fig_format or Path(plt_name).suffix.lstrip(".") or "png" plt.savefig(plt_name.lower(), format=fmt, dpi=dpi) logger.info("Saved linear Manhattan plot: %s", plt_name.lower()) return fig, axes
[docs] def plot_linear( sumstats_loaded: list[str], track_heights: list[float] = None, logp: bool = False, point_size: Optional[float] = 8, highlight: bool = False, highlight_color: str = 'brown', highlight_line: bool = False, highlight_line_color: str = 'grey', hits_table: Optional[pd.DataFrame] = None, annotate: str = None, annotation_size: float = 8, label_col: Optional[str] = None, chr_spacing: Optional[float] = 9e6, linear_track_spacing: Optional[float] = None, annot_rail_frac: Optional[float] = 0.98, colors: list[str] = ['steelblue','silver'], signif_lines: Optional[dict] = None, plot_title: Optional[str] = None, no_track_labels: bool = False, ylabel: Optional[str] = None, dpi: Optional[int] = None, output_format: Optional[str] = 'png', output_dir: Optional[str] = '.', figsize: Optional[list[float]] = [10, 4], ): """Generate a multi-track stacked linear Manhattan plot. This is the primary user-facing entry point for linear Manhattan plots. It extracts DataFrames and labels from *sumstats_loaded*, resolves output file paths, then delegates all rendering to :func:`plot_linearm`. Parameters ---------- sumstats_loaded : dict Mapping of ``label → [DataFrame, n_chroms]`` as returned by :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`. One track is created per key; the DataFrames must have canonical columns ``CHR``, ``POS``, ``P``, and ``logP`` (when *logp* is ``True``). trim_pval : float, optional Reserved; trimming is applied upstream. Default ``None``. track_heights : list of float, optional Comma-parsed relative track heights (e.g. ``[2.0, 2.0, 1.5]``). Passed directly to :func:`plot_linearm` as the gridspec height ratios for the data tracks. When ``None``, all tracks are equal. logp : bool, optional Plot –log₁₀(p) on the y-axis. Default ``False``. point_size : float, optional Scatter-plot point size. Default uses :func:`plot_linearm`'s default (``5``). highlight : bool, optional Render significant-locus variants in brown. Default ``False``. highlight_thresh : float, optional P-value threshold for locus highlighting. Default ``5e-8``. hits_table : pandas.DataFrame, optional Annotation DataFrame (hits summary table from :func:`~pycmplot.annotation.get_hits_summary_table`). Must contain columns ``CHR``, ``POS``, and *label_col*. Passed to DataFrame suppresses annotations. label_col : str, optional Column in *hits_table* to use as annotation text (e.g. ``'top_gene'``). Default ``None`` (falls back to :func:`plot_linearm` default ``'label'``). chr_spacing : float, optional Horizontal gap between chromosomes in base-pairs. Default ``9e6``. linear_track_spacing : float, optional Vertical space between tracks as a fraction of average track height. Default ``0.10``. annot_rail_frac : float, optional Fraction of horizontal space covering the center of the annotation track within which to place annotation texts. Default ``0.98`` (annotation texts will cover 98% of annotation track horizontally) colors : list of str, optional Two alternating chromosome colours. Default ``['steelblue', 'silver']``. signif_lines : list of dict, optional One ``{'genome': float, 'suggestive': float}`` dict per track, in the same order as *sumstats_loaded*. Produced by :func:`~pycmplot.io.get_sumstats_and_merged_sector_list`. plot_title : str, optional Human-readable title used as the output file-name stem. Passed to :func:`~pycmplot.io.get_output_paths`. no_track_labels : bool, optional Suppress per-track labels. Default ``False``. ylabel : str, optional Override the shared y-axis label (left margin). Useful for non-p-value statistics such as iHS, F_ST or XP-EHH (e.g. ``ylabel="iHS"``). When ``None`` (the default), the label is ``"-log₁₀(p-value)"`` if *logp* is ``True`` and ``"P"`` otherwise. dpi : int, optional Output resolution in dots per inch. Default ``300``. output_format : str, optional Image format (``'png'``, ``'pdf'``, ``'svg'``, ``'jpg'``). Default ``'png'``. output_dir : str or pathlib.Path, optional Directory in which to save the output files. Default ``'.'``. figsize : tuple of (float, float), optional Figure dimensions ``(width, height)`` in inches. Default ``(15, 9)``. Returns ------- fig : matplotlib.figure.Figure The completed figure. axes : list of matplotlib.axes.Axes All axes: ``axes[0]`` is the annotation sub-panel; ``axes[1:]`` are the per-track data axes. See Also -------- plot_linearm : The underlying rendering engine called by this function. pycmplot.io.get_sumstats_and_merged_sector_list : Produces *sumstats_loaded* and *signif_lines*. Examples -------- >>> from pycmplot.plotting.linear import plot_linear >>> fig, axes = plot_linear( ... sumstats_loaded=loaded, ... logp=True, ... highlight=True, ... hits_table=hits, ... label_col="top_gene", ... signif_lines=sig_lines, ... plot_title="RBC_Traits", ... output_dir="./results", ... ) """ dfs = [v[0] for v in sumstats_loaded.values()] t_labels = list(sumstats_loaded.keys()) label = 'SNP' if annotate: label = get_annotation_column( annotate=annotate, hits_table=hits_table, label_col=label_col ) # plot name ( plt_name, table_out, plt_base, ) = get_output_paths( labels = t_labels, mode='lm', logp=logp, output_dir=output_dir, plot_title=plot_title, output_format=output_format ) fig, axes = plot_linearm( tracks=dfs, track_labels=t_labels, logp=True if logp else False, point_size=point_size, highlight=highlight, highlight_color = highlight_color, highlight_line = highlight_line, highlight_line_color = highlight_line_color, annotate=annotate, annotation_size=annotation_size, annot_df=hits_table if hits_table is not None and not hits_table.empty else None, label_col=label, chr_spacing=chr_spacing, track_heights=track_heights, linear_track_spacing=linear_track_spacing, annot_rail_frac=annot_rail_frac, colors=colors, sig_lines=signif_lines, plt_name=plt_name, no_track_labels = no_track_labels, ylabel=ylabel, dpi=dpi, fig_format=output_format, figsize=figsize, ) return axes