Source code for aigct.report_util

import pandas as pd


[docs] class GeneMetricSorter: """ Sort the gene metrics dataframe by variant effect source ascending and descending by the number of unique variants per gene. This will ensure that the gene metrics are presented in a consistent order across all gene metric dataframes. """ def __init__(self, gene_variant_counts: pd.DataFrame, num_top_genes: int = None): """ Parameters ---------- gene_variant_counts : pd.DataFrame Dataframe with columns GENE_SYMBOL, NUM_UNIQUE_VARIANTS num_top_genes : int, optional If specified, only consider the top N genes by number of unique variants. """ if num_top_genes is not None: self._gene_variant_counts = gene_variant_counts.sort_values( by="NUM_UNIQUE_VARIANTS", ascending=False ).iloc[:num_top_genes] else: self._gene_variant_counts = gene_variant_counts
[docs] def sort_gene_metrics(self, gene_metrics: pd.DataFrame): cols = gene_metrics.columns if "SCORE_SOURCE" in cols: score_source_col = "SCORE_SOURCE" else: score_source_col = "SOURCE_NAME" return gene_metrics.merge(self._gene_variant_counts, how="inner", on="GENE_SYMBOL").sort_values( by=[score_source_col, "NUM_UNIQUE_VARIANTS"], ascending=[True, False] )[cols]