aegis_sim.submodels.genetics.modifying.gpm
1import logging 2import numpy as np 3from aegis_sim import parameterization 4from numba import njit 5 6 7class GPM: 8 """Genotype-phenotype map 9 10 Order of elements in the vector does not matter. # TODO Explain better 11 12 ### GENOTYPE-PHENOTYPE MAP (GPM) ### 13 In AEGIS, every individual carries a genome which encodes an intrinsic phenotype. 14 A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM). 15 Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype 16 of the individual (e.g. the first site decreases fertility by 0.15% at age class 28). 17 18 The GPM can be saved in two formats: a list or a matrix. 19 20 If it is a list, it will be a list of quadruple (4-tuple) with the following structure: `index`, `trait`, `age`, `magnitude`. 21 Thus a single quadruple encodes an effect of a single site at the index `index` (e.g. 1) 22 on the trait `trait` (e.g. fertility) expressed at the age `age` (e.g. 28). The change to the trait is of magnitude `magnitude` (0.85). 23 When a site is pleiotropic, there will be multiple quadruples with the same `index`. 24 We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits). 25 26 If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode `index`, `trait` and `age`, 27 while the matrix values encode the `magnitude`s. 28 29 When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix. 30 When most sites have age-specific and trait-specific effects, the optimal encoding format is a list 31 rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's). 32 """ 33 34 def __init__(self, phenomatrix, phenolist): 35 self.phenomatrix = phenomatrix 36 self.phenolist = phenolist 37 38 self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None 39 if self.dummy: 40 logging.info("Phenomap inactive.") 41 42 def phenodiff(self, vectors, zeropheno): 43 """ 44 vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) 45 phenomatrix .. 46 phenolist .. list of (bit_index, trait, age, magnitude) 47 zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0) 48 """ 49 50 if self.phenomatrix is not None: 51 # TODO BUG resolve phenomatrix 52 return vectors.dot(self.phenomatrix) 53 54 elif self.phenolist is not None: 55 phenodiff = zeropheno.copy() 56 for vec_index, trait, age, magnitude in self.phenolist: 57 vec_state = vectors[:, vec_index] 58 phenotype_change = vec_state * magnitude 59 phenotype_index = parameterization.traits[trait].start + age 60 phenodiff[:, phenotype_index] += phenotype_change 61 return phenodiff 62 63 else: 64 raise Exception("Neither phenomatrix nor phenolist has been provided.") 65 66 def phenodiff_accelerated(self, vectors, zeropheno): 67 if self.phenomatrix is not None: 68 return vectors.dot(self.phenomatrix) 69 70 elif self.phenolist is not None: 71 phenodiff = zeropheno.copy() 72 vec_indices, traits, ages, magnitudes = zip(*self.phenolist) 73 vec_indices = np.array(vec_indices) 74 magnitudes = np.array(magnitudes) 75 vec_states = vectors[:, vec_indices] 76 phenotype_indices = np.array( 77 [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)] 78 ) 79 phenodiff = apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes) 80 return phenodiff 81 82 else: 83 raise Exception("Neither phenomatrix nor phenolist has been provided.") 84 85 def __call__(self, interpretome, zeropheno): 86 if self.dummy: 87 return zeropheno 88 else: 89 # phenodiff_old = self.phenodiff(vectors=interpretome, zeropheno=zeropheno) 90 phenodiff = self.phenodiff_accelerated(vectors=interpretome, zeropheno=zeropheno) 91 return phenodiff 92 93 94@njit 95def apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes): 96 """ 97 phenodiff .. modification to phenotype vector for a specific trait; shape is (individual, phenotypic site) 98 vec_states .. state of each genomic site for each individual; shape is (individual, genomic site) 99 phenotype_indices .. position in the phenotype vector that is modified by the genomic site; shape is (genomic site) 100 magnitudes .. effect size for each site; shape is (genomic site) 101 """ 102 n_individuals, n_phenos = vec_states.shape 103 for i in range(n_phenos): 104 for j in range(n_individuals): 105 phenodiff[j, phenotype_indices[i]] += vec_states[j, i] * magnitudes[i] 106 return phenodiff
8class GPM: 9 """Genotype-phenotype map 10 11 Order of elements in the vector does not matter. # TODO Explain better 12 13 ### GENOTYPE-PHENOTYPE MAP (GPM) ### 14 In AEGIS, every individual carries a genome which encodes an intrinsic phenotype. 15 A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM). 16 Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype 17 of the individual (e.g. the first site decreases fertility by 0.15% at age class 28). 18 19 The GPM can be saved in two formats: a list or a matrix. 20 21 If it is a list, it will be a list of quadruple (4-tuple) with the following structure: `index`, `trait`, `age`, `magnitude`. 22 Thus a single quadruple encodes an effect of a single site at the index `index` (e.g. 1) 23 on the trait `trait` (e.g. fertility) expressed at the age `age` (e.g. 28). The change to the trait is of magnitude `magnitude` (0.85). 24 When a site is pleiotropic, there will be multiple quadruples with the same `index`. 25 We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits). 26 27 If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode `index`, `trait` and `age`, 28 while the matrix values encode the `magnitude`s. 29 30 When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix. 31 When most sites have age-specific and trait-specific effects, the optimal encoding format is a list 32 rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's). 33 """ 34 35 def __init__(self, phenomatrix, phenolist): 36 self.phenomatrix = phenomatrix 37 self.phenolist = phenolist 38 39 self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None 40 if self.dummy: 41 logging.info("Phenomap inactive.") 42 43 def phenodiff(self, vectors, zeropheno): 44 """ 45 vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) 46 phenomatrix .. 47 phenolist .. list of (bit_index, trait, age, magnitude) 48 zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0) 49 """ 50 51 if self.phenomatrix is not None: 52 # TODO BUG resolve phenomatrix 53 return vectors.dot(self.phenomatrix) 54 55 elif self.phenolist is not None: 56 phenodiff = zeropheno.copy() 57 for vec_index, trait, age, magnitude in self.phenolist: 58 vec_state = vectors[:, vec_index] 59 phenotype_change = vec_state * magnitude 60 phenotype_index = parameterization.traits[trait].start + age 61 phenodiff[:, phenotype_index] += phenotype_change 62 return phenodiff 63 64 else: 65 raise Exception("Neither phenomatrix nor phenolist has been provided.") 66 67 def phenodiff_accelerated(self, vectors, zeropheno): 68 if self.phenomatrix is not None: 69 return vectors.dot(self.phenomatrix) 70 71 elif self.phenolist is not None: 72 phenodiff = zeropheno.copy() 73 vec_indices, traits, ages, magnitudes = zip(*self.phenolist) 74 vec_indices = np.array(vec_indices) 75 magnitudes = np.array(magnitudes) 76 vec_states = vectors[:, vec_indices] 77 phenotype_indices = np.array( 78 [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)] 79 ) 80 phenodiff = apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes) 81 return phenodiff 82 83 else: 84 raise Exception("Neither phenomatrix nor phenolist has been provided.") 85 86 def __call__(self, interpretome, zeropheno): 87 if self.dummy: 88 return zeropheno 89 else: 90 # phenodiff_old = self.phenodiff(vectors=interpretome, zeropheno=zeropheno) 91 phenodiff = self.phenodiff_accelerated(vectors=interpretome, zeropheno=zeropheno) 92 return phenodiff
Genotype-phenotype map
Order of elements in the vector does not matter. # TODO Explain better
GENOTYPE-PHENOTYPE MAP (GPM)
In AEGIS, every individual carries a genome which encodes an intrinsic phenotype. A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM). Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype of the individual (e.g. the first site decreases fertility by 0.15% at age class 28).
The GPM can be saved in two formats: a list or a matrix.
If it is a list, it will be a list of quadruple (4-tuple) with the following structure: index
, trait
, age
, magnitude
.
Thus a single quadruple encodes an effect of a single site at the index index
(e.g. 1)
on the trait trait
(e.g. fertility) expressed at the age age
(e.g. 28). The change to the trait is of magnitude magnitude
(0.85).
When a site is pleiotropic, there will be multiple quadruples with the same index
.
We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits).
If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode index
, trait
and age
,
while the matrix values encode the magnitude
s.
When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix. When most sites have age-specific and trait-specific effects, the optimal encoding format is a list rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's).
43 def phenodiff(self, vectors, zeropheno): 44 """ 45 vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) 46 phenomatrix .. 47 phenolist .. list of (bit_index, trait, age, magnitude) 48 zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0) 49 """ 50 51 if self.phenomatrix is not None: 52 # TODO BUG resolve phenomatrix 53 return vectors.dot(self.phenomatrix) 54 55 elif self.phenolist is not None: 56 phenodiff = zeropheno.copy() 57 for vec_index, trait, age, magnitude in self.phenolist: 58 vec_state = vectors[:, vec_index] 59 phenotype_change = vec_state * magnitude 60 phenotype_index = parameterization.traits[trait].start + age 61 phenodiff[:, phenotype_index] += phenotype_change 62 return phenodiff 63 64 else: 65 raise Exception("Neither phenomatrix nor phenolist has been provided.")
vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) phenomatrix .. phenolist .. list of (bit_index, trait, age, magnitude) zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)
67 def phenodiff_accelerated(self, vectors, zeropheno): 68 if self.phenomatrix is not None: 69 return vectors.dot(self.phenomatrix) 70 71 elif self.phenolist is not None: 72 phenodiff = zeropheno.copy() 73 vec_indices, traits, ages, magnitudes = zip(*self.phenolist) 74 vec_indices = np.array(vec_indices) 75 magnitudes = np.array(magnitudes) 76 vec_states = vectors[:, vec_indices] 77 phenotype_indices = np.array( 78 [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)] 79 ) 80 phenodiff = apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes) 81 return phenodiff 82 83 else: 84 raise Exception("Neither phenomatrix nor phenolist has been provided.")
95@njit 96def apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes): 97 """ 98 phenodiff .. modification to phenotype vector for a specific trait; shape is (individual, phenotypic site) 99 vec_states .. state of each genomic site for each individual; shape is (individual, genomic site) 100 phenotype_indices .. position in the phenotype vector that is modified by the genomic site; shape is (genomic site) 101 magnitudes .. effect size for each site; shape is (genomic site) 102 """ 103 n_individuals, n_phenos = vec_states.shape 104 for i in range(n_phenos): 105 for j in range(n_individuals): 106 phenodiff[j, phenotype_indices[i]] += vec_states[j, i] * magnitudes[i] 107 return phenodiff
phenodiff .. modification to phenotype vector for a specific trait; shape is (individual, phenotypic site) vec_states .. state of each genomic site for each individual; shape is (individual, genomic site) phenotype_indices .. position in the phenotype vector that is modified by the genomic site; shape is (genomic site) magnitudes .. effect size for each site; shape is (genomic site)