Commit c2efcb08 authored by Julien Boccard's avatar Julien Boccard
Browse files

Update volcano/__init__.py, volcano/comparator.py, volcano/plot.py files

parent 537c998e
from .comparator import ttest
\ No newline at end of file
import numpy as np
import scipy.stats as st
from ..io import filters
from .plot import plot_static, plot_interactive
def ttest(*args, **kwargs):
return Comparator(*args, **kwargs)
class Comparator:
def __init__(s, intensity_threshold, fold_threshold, min_pvalue, min_log_fold):
s.intensity_threshold = intensity_threshold
s.fold_threshold = fold_threshold
s.min_pvalue = min_pvalue
s.min_log_fold = min_log_fold
def run(s, datatable, group_property, control_group, exclude_groups=None):
if exclude_groups is None:
exclude_groups = []
return ComparisonSet(datatable, group_property, s, control_group, exclude_groups)
class ComparisonSet:
def __init__(s, datatable, group_property, comparator, control_group, exclude_groups):
group_names = datatable.observation_metadata[group_property].select().values
s.comparison_groups = list(set(group_names) - set([control_group]) - set(exclude_groups))
s.comparisons = [
Comparison(datatable, group_property, control_group, target_group, comparator)
for target_group in s.comparison_groups
]
def __iter__(s):
return s.comparisons.__iter__()
def __getitem__(s, index):
try:
return s.comparisons[index]
except TypeError:
for c in s.comparisons:
if c.target_group == index:
return c
raise TypeError(f'Invalid index {index}')
def __len__(s):
return len(s.comparisons)
def add_to_source(s, *args, **kwargs):
for comparison in s:
comparison.add_to_source(*args, **kwargs)
class Comparison:
def __init__(s, datatable, group_property, control_group, target_group, comparator):
s.target_group = target_group
s.control_group = control_group
s.datatable = datatable
s.comparator = comparator
control_data = datatable.select(
observation_mask=datatable.observation_mask(group_property, filters.valid_id(exact=control_group))
).data
group_data = datatable.select(
observation_mask=datatable.observation_mask(group_property, filters.valid_id(exact=target_group))
).data
control_means = np.mean(control_data, axis=0)
group_means = np.mean(group_data, axis=0)
if comparator.fold_threshold is None:
s.folds = np.maximum(
group_means, comparator.intensity_threshold
) / np.maximum(
control_means, comparator.intensity_threshold
)
s.log2_folds = np.log2(s.folds)
else:
log_limit = np.log(2) * comparator.fold_threshold
pow_s = np.power(np.maximum(control_means, comparator.intensity_threshold), 2 / log_limit)
pow_e = np.power(np.maximum(group_means, comparator.intensity_threshold), 2 / log_limit)
s.log2_folds = log_limit * (pow_e - pow_s) / (pow_e + pow_s)
s.folds = np.power(2, s.log2_folds)
with np.errstate(invalid='ignore'):
s.tstats, s.pvalues = st.ttest_ind(control_data, group_data, axis=0, equal_var=False)
s.log10_pvalues = - np.log10(s.pvalues)
for i in range(len(s.pvalues)):
if np.isnan(s.pvalues[i]):
s.pvalues[i] = 1.0
s.scores = np.sqrt(
np.power(s.log2_folds / comparator.min_log_fold, 2) +
np.power(np.log2(s.pvalues) / np.log2(comparator.min_pvalue), 2)
)
s.selection = np.logical_and(
np.abs(s.log2_folds) > comparator.min_log_fold,
s.pvalues < comparator.min_pvalue
)
def add_to_source(s, key_prefix, format_string='{}: {} vs {} ({})'):
s.datatable.map_variable_metadata(
new_key=format_string.format(key_prefix, s.target_group, s.control_group, 'p-Value'),
mapping_function=lambda i: s.pvalues[i]
)
s.datatable.map_variable_metadata(
new_key=format_string.format(key_prefix, s.target_group, s.control_group, 'Log2-Fold'),
mapping_function=lambda i: s.log2_folds[i]
)
s.datatable.map_variable_metadata(
new_key=format_string.format(key_prefix, s.target_group, s.control_group, 'Fold'),
mapping_function=lambda i: s.folds[i]
)
s.datatable.map_variable_metadata(
new_key=format_string.format(key_prefix, s.target_group, s.control_group, 'Score'),
mapping_function=lambda i: s.scores[i]
)
def plot(s, interactive=True, **kwargs):
if interactive:
plot_interactive(s, **kwargs)
else:
plot_static(s, **kwargs)
\ No newline at end of file
import numpy as np
def plot_static(s, view_pvalue, view_log_fold, **figure_kwargs):
import matplotlib.pyplot as pl
selection_pvalues = s.pvalues[s.selection]
selection_log_folds = s.log2_folds[s.selection]
comparator = s.comparator
pl.figure(**figure_kwargs)
pl.plot(s.log2_folds, s.pvalues,
linestyle='none', marker='o', markersize=4, markerfacecolor=(1,1,1), markeredgecolor=(0,0,0))
pl.plot(selection_log_folds, selection_pvalues,
linestyle='none', marker='o', markersize=4, markerfacecolor=(0.5,0.5,0.5), markeredgecolor=(0,0,0))
pl.plot([-comparator.min_log_fold, -comparator.min_log_fold], [-1, 2],
linestyle='--', color=(0.8,0,0), linewidth=1)
pl.plot([+comparator.min_log_fold, +comparator.min_log_fold], [-1, 2],
linestyle='--', color=(0.8,0,0), linewidth=1)
pl.plot([-2*view_log_fold, 2*view_log_fold], [comparator.min_pvalue, comparator.min_pvalue],
linestyle='--', color=(0.8,0,0), linewidth=1)
pl.ylim([2, view_pvalue])
pl.xlim([-view_log_fold, +view_log_fold])
pl.yscale('log')
pl.ylabel('$p$ value')
pl.xlabel(r'$\log_2$ fold change')
pl.title(f'Volcano for {s.target_group} vs. {s.control_group}')
def plot_interactive(s, view_pvalue, view_log_fold, variable_name_property, backend='canvas', **bokeh_args):
import bokeh.models as bmd
import bokeh.plotting as bpt
import bokeh.io as bio
comparator = s.comparator
hover_tool = bmd.HoverTool(
tooltips=[
('p-Value', '@pvalues'),
('log2-fold', '@logfolds'),
('Variable', '@variables')
]
)
pan_tool = bmd.PanTool()
zoom_tool = bmd.WheelZoomTool()
fig = bpt.figure(
tools=[hover_tool, pan_tool, zoom_tool, bmd.SaveTool(), bmd.BoxZoomTool(), bmd.ResetTool()],
x_range=bmd.Range1d(-view_log_fold, +view_log_fold),
y_range=bmd.Range1d(2, view_pvalue), y_axis_type='log',
x_axis_label='log-2 fold change', y_axis_label='p-value',
output_backend=backend,
title=(
f'Volcano for {s.target_group} vs. {s.control_group}'
)
)
fig.toolbar.active_drag = pan_tool
fig.toolbar.active_inspect = hover_tool
fig.toolbar.active_scroll = zoom_tool
variable_names = s.datatable.variable_metadata[variable_name_property].values
variable_mask = s.datatable.variable_metadata[variable_name_property].mask
data_source = bmd.ColumnDataSource({
'pvalues': s.pvalues,
'logfolds': s.log2_folds,
'variables': variable_names,
'fill color': np.where(s.selection, '#444444', '#FFFFFF'),
'line color': np.where(variable_mask, '#FF0000', '#000000'),
'line size': np.where(variable_mask, 1.25, 0.5)
})
fig.circle(
source=data_source,
x='logfolds', y='pvalues',
fill_color='fill color', line_color='line color', fill_alpha=0.5,
size=8, line_width='line size'
)
for angle in [0, np.pi]:
fig.ray(
x=[-comparator.min_log_fold, +comparator.min_log_fold],
y=[2, 2], angle=(np.pi/2 + angle), length=None,
line_dash='dashed', line_width=1, line_color='#CC0000'
)
fig.ray(
x=[0], y=[comparator.min_pvalue],
angle=angle, length=None,
line_dash='dashed', line_width=1, line_color='#CC0000'
)
bio.reset_output()
bio.output_notebook()
bpt.show(fig, **bokeh_args)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment