Source code for finches.frontend.calvados_frontend

from finches.frontend.frontend_base import FinchesFrontend

# for model construction
from finches.forcefields.calvados import calvados_model
from finches import epsilon_calculation

# needed so we preserve docstrings after decorator is applied...
from functools import wraps


##
## This is a decorator that checks for RNA in the input sequences and throws an
## exception if it is found. NOTE the @wraps(func) decorator is needed to preserve
## the docstring of the function being decorated.
##
def RNA_check(func):
    @wraps(func)
    def wrapper(*args, **kwargs):

        # args[1] = seq1
        # args[2] = seq2
        if 'U' in args[1] or 'U' in args[2]:
            raise ValueError("CALVADOS2 cannot handle RNA ('U')")
        return func(*args, **kwargs)
    return wrapper


[docs] class CALVADOS_frontend(FinchesFrontend): # .................................................................................... # # def __init__(self, salt=0.150, pH=7.4, temp=288): # call superclass constructor super().__init__() # initialize the CALVADOS forcefield opbject self.model = calvados_model('CALVADOS2', salt=salt, pH=pH, temp=temp) # build an interaction matrix constructor object self.IMC_object = epsilon_calculation.InteractionMatrixConstructor(self.model) # decorator checks for RNA in CALVADOS input # .................................................................................... # #
[docs] @RNA_check def intermolecular_idr_matrix(self, seq1, seq2, window_size=31, use_cython=True, use_aliphatic_weighting=True, use_charge_weighting=True, disorder_1=True, disorder_2=True, null_shuffle=False): """ Returns the interaction matrix for the two sequences. Specifically this involves decomposing the two sequences into window_size fragments and calculating the inter-fragment epsilon values using a sliding window approach. Note that we don't pad the sequence here, so the edges of the matrix start and end at indices that depend on the window size. To avoid confusion, the function also returns the indices for sequence1 and sequence2. Parameters -------------- seq1 : str Input sequence 1 seq2 : str Input sequence 2 window_size : int The window size to use for the interaction matrix calculation. Default is 31. use_cython : bool Whether to use the cython implementation of the interaction matrix calculation. Default is True. use_aliphatic_weighting : bool Whether to use the aliphatic weighting scheme for the interaction matrix calculation. This weights local aliphatic residues based on the number of aliphatic residues adjacent to them. Default is True. use_charge_weighting : bool Whether to use the charge weighting scheme for the interaction matrix. This weights local charged residues based on the number of charged residues adjacent to them. Default is True. disorder_1 : bool Whether to generate the disorder profile for sequence 1. Default is True. If False, a uniform disorder profile is used (all values=1). disorder_2 : bool Whether to generate the disorder profile for sequence 2. Default is True. If False, a uniform disorder profile is used (all values=1). null_shuffle : bool Whether to shuffle the sequence before calculating the interaction matrix. Default is False. If set to a number defines the number of shuffles used for each sequence; recommended to use 100 shuffles. Returns -------------- tuple A tuple containing the interaction matrix, disorder profile for sequence 1, and disorder profile for sequence 2. [0] : This is interaction matrix, and is itself a tuple of 3 elements. The first is the matrix of sliding epsilon values, and the second and 3rd are the indices that map sequence position from sequence1 and sequence2 to the matrix [1] disorder profile for sequence 1. Will be all 1s if disorder_1 is False [2] disorder profile for sequence 2. Will be all 1s if disorder_2 is False """ # call the superclass function return super().intermolecular_idr_matrix(seq1, seq2, window_size=window_size, use_cython=use_cython, use_aliphatic_weighting=use_aliphatic_weighting, use_charge_weighting=use_charge_weighting, disorder_1=disorder_1, disorder_2=disorder_2, null_shuffle=null_shuffle)
# .................................................................................... # #
[docs] @RNA_check def epsilon(self, seq1, seq2, use_aliphatic_weighting=True, use_charge_weighting=True): """ Returns the epilson value associated with the two sequences. Note that CALVADOS does not currently support RNA. Parameters -------------- seq1 : str Input sequence 1 seq2 : str Input sequence 2 use_aliphatic_weighting : bool Whether to use the aliphatic weighting scheme for the interaction matrix calculation. This weights local aliphatic residues based on the number of aliphatic residues adjacent to them. Default is True. use_charge_weighting : bool Whether to use the charge weighting scheme for the interaction matrix. This weights local charged residues based on the number of charged residues adjacent to them. Default is True. Returns -------------- float The epsilon value for the two sequences. """ return self.IMC_object.calculate_epsilon_value(seq1, seq2, use_aliphatic_weighting=use_aliphatic_weighting, use_charge_weighting=use_charge_weighting)
# .................................................................................... # #
[docs] @RNA_check def interaction_figure(self, seq1, seq2, window_size=31, use_cython=True, use_aliphatic_weighting=True, use_charge_weighting=True, tic_frequency=100, seq1_domains=[], seq2_domains=[], seq1_lines=[], seq2_lines=[], vmin=-7.5, vmax=7.5, cmap='PRGn', fname=None, zero_folded=True, no_disorder=False, null_shuffle=False, plot_rectangles=None): """ Function to generate an interaction matrix figure between two sequences. This does all the calculation on the backend and formats a figure with parallel disorder tracks alongside the interaction matrix. Parameters -------------- seq1 : str Input sequence 1 seq2 : str Input sequence 2 window_size : int Size of the window to use for the interaction matrix calculation. Note this must be an odd number and will be converted to an odd number if it is not. Default is 31. use_cython : bool Whether to use the cython implementation of the interaction matrix (always use this if you can). Default is True. use_aliphatic_weighting : bool Whether to use the aliphatic weighting scheme for the interaction matrix calculation. This weights local aliphatic residues based on the number of aliphatic residues adjacent to them. Default is True. use_charge_weighting : bool Whether to use the charge weighting scheme for the interaction matrix. This weights local charged residues based on the number of charged residues adjacent to them. Default is True. tic_frequency : int Frequency of the TICs on the plot. Default is 100. seq1_domains : list List of tuples/lists containing the start and end positions of domains in sequence 1. This means these can be easily highlighted in the plot. seq2_domains : list List of tuples/lists containing the start and end positions of domains in sequence 2. This means these can be easily highlighted in the plot. seq1_lines : list List of values that will draw lines onto the plot along sequence 1. seq2_lines : list List of values that will draw lines onto the plot along sequence 1. vmin : float Minimum value for the interaction matrix color scale. Default is -0.75. vmax : float Maximum value for the interaction matrix color scale. Default is 0.75. cmap : str Colormap to use for the interaction matrix. Default is 'PRGn'. fname : str Filename to save the figure to. If None, the figure will be displayed disorder_1 : bool Whether to include the disorder profile for sequence 1. Default is True. disorder_2 : bool Whether to include the disorder profile for sequence 2. Default is True. no_disorder : bool Whether to include the disorder profiles. Default is False. If True, the disorder profiles will not be included. null_shuffle : bool Whether to shuffle the sequence before calculating the interaction matrix. Default is False. If set to a number defines the number of shuffles used for each sequence; recommended to use 100 shuffles. plot_rectangles : list If a list is provided it should be a list of lists, where each sublist has the folowing information [seq1_start, seq1_end, seq2_start, seq2_end, color, alpha, kwargs]. Based on this information, rectangles will be drawn on the plot to highlight specific regions. Default is None. Returns -------------- A tuple containing the figure and the axes objects for the main plot, the top disorder plot, the right disorder plot and the colorbar. fig : matplotlib.figure.Figure (from plt.figure() im : matplotlib.image.AxesImage (from plt.imshow()) ax_main : matplotlib.axes.Axes (from plt.subplot2grid() ax_top : matplotlib.axes.Axes (from plt.subplot2grid() ax_right : matplotlib.axes.Axes (from plt.subplot2grid() ax_colorbar : matplotlib.axes.Axes (from plt.subplot2grid() """ # call the superclass function return super().interaction_figure(seq1, seq2, window_size=window_size, use_cython=use_cython, use_aliphatic_weighting=use_aliphatic_weighting, use_charge_weighting=use_charge_weighting, tic_frequency=tic_frequency, seq1_domains=seq1_domains, seq2_domains=seq2_domains, seq1_lines=seq1_lines, seq2_lines=seq2_lines, vmin=vmin, vmax=vmax, cmap=cmap, fname=fname, zero_folded=zero_folded, disorder_1=True, disorder_2=True, no_disorder=no_disorder, null_shuffle=null_shuffle, plot_rectangles=plot_rectangles)
# .................................................................................... # #
[docs] def protein_nucleic_vector(seq, fragsize=31, smoothing_window=30, poly_order=3): """ Stub function to calculate the protein-nucleic acid interaction vector. CALVADOS does not currently support RNA. """ raise Exception('CALVADOS cannot currently handle RNA')