Source code for itrails.int_load_trans_mat

import numpy as np
import pandas as pd
from importlib.resources import files


[docs] def load_trans_mat(n_seq): """ This functions returns a string matrix for the CTMC. Parameters ---------- n_seq : integer Number of sequences of the CTMC """ resource = files(__package__).joinpath(f"data/trans_mat_full_{n_seq}.csv") with resource.open("rb") as stream: df = pd.read_csv(stream) # Create dictionary with all CTMC state names and their index df_2 = { "names": pd.concat([df["from_str"], df["to_str"]]), "values": pd.concat([df["from"], df["to"]]), } # Convert names to data frame, drop duplicated states and sort by index df_2 = pd.DataFrame(data=df_2).drop_duplicates().sort_values(by=["values"]) # Pivot data frame to matrix, and fill missing values with '0' df_1 = ( df[["value", "from", "to"]] .pivot(index="from", columns="to", values="value") .fillna("0") ) # Reset column names df_1.columns.name = None df_1 = df_1.reset_index().iloc[:, 1:] return np.array(df_1), list(df_2["names"])
[docs] def trans_mat_num(trans_mat, coal, rho): """ This function returns a transition matrix given a string matrix whose values are either '0', or 'R' or 'C' preceeded by a number, corresponding to a multiplication factor for the recombination and the coalescence rate, respectively. The user can specify these two numerical rates. The function calculates the rates in the diagonals as (-1)*rowSums Parameters ---------- trans_mat : string numpy matrix Matrix containing coalescent and recombination rates in strings. coal : float Coalescent rate. rho : float Recombination rate """ # Define the number of rows and columns from string matrix shape num_rows, num_cols = trans_mat.shape # Define numeric matrix of that shape trans_mat_num = np.full((num_rows, num_cols), 0.0) # For each row for i in range(num_rows): # For each column for j in range(num_cols): # If the string matrix is '0' if trans_mat[i, j] == "0": # Add numeric 0 to numeric matrix trans_mat_num[i, j] = 0.0 # Otherwise else: # Multiply factor from string matrix to either coalescent or recombination rate trans_mat_num[i, j] = coal if trans_mat[i, j] == "C" else rho # Calculate diagonal as (-1)*rowSums for i in range(num_rows): trans_mat_num[i, i] = -sum(trans_mat_num[i]) return trans_mat_num