aegis_sim.submodels.genetics.modifying.gpm
1import logging 2import numpy as np 3from aegis_sim import parameterization 4from numba import njit, prange 5 6 7class GPM: 8 """Genotype-phenotype map 9 10 Order of elements in the vector does not matter. # TODO Explain better 11 12 ### GENOTYPE-PHENOTYPE MAP (GPM) ### 13 In AEGIS, every individual carries a genome which encodes an intrinsic phenotype. 14 A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM). 15 Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype 16 of the individual (e.g. the first site decreases fertility by 0.15% at age class 28). 17 18 The GPM can be saved in two formats: a list or a matrix. 19 20 If it is a list, it will be a list of quadruple (4-tuple) with the following structure: `index`, `trait`, `age`, `magnitude`. 21 Thus a single quadruple encodes an effect of a single site at the index `index` (e.g. 1) 22 on the trait `trait` (e.g. fertility) expressed at the age `age` (e.g. 28). The change to the trait is of magnitude `magnitude` (0.85). 23 When a site is pleiotropic, there will be multiple quadruples with the same `index`. 24 We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits). 25 26 If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode `index`, `trait` and `age`, 27 while the matrix values encode the `magnitude`s. 28 29 When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix. 30 When most sites have age-specific and trait-specific effects, the optimal encoding format is a list 31 rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's). 32 """ 33 34 def __init__(self, phenomatrix, phenolist): 35 self.phenomatrix = phenomatrix 36 self.phenolist = phenolist 37 38 self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None 39 if self.dummy: 40 logging.info("Phenomap inactive.") 41 42 # Pre-resolved arrays for phenodiff_accelerated (cached to avoid 43 # recomputing every call). Populated lazily on first use because 44 # parameterization.traits may not be ready at __init__ time. 45 self._resolved = False 46 self._vec_indices = None 47 self._phenotype_indices = None 48 self._magnitudes = None 49 50 def _resolve_phenolist(self): 51 """Pre-resolve phenolist into numpy arrays (once).""" 52 if self._resolved: 53 return 54 vec_indices, traits, ages, magnitudes = zip(*self.phenolist) 55 self._vec_indices = np.array(vec_indices, dtype=np.int64) 56 self._magnitudes = np.array(magnitudes, dtype=np.float64) 57 self._phenotype_indices = np.array( 58 [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)], 59 dtype=np.int64, 60 ) 61 self._resolved = True 62 63 def phenodiff(self, vectors, zeropheno): 64 """ 65 vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) 66 phenomatrix .. 67 phenolist .. list of (bit_index, trait, age, magnitude) 68 zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0) 69 """ 70 71 if self.phenomatrix is not None: 72 # TODO BUG resolve phenomatrix 73 return vectors.dot(self.phenomatrix) 74 75 elif self.phenolist is not None: 76 phenodiff = zeropheno.copy() 77 for vec_index, trait, age, magnitude in self.phenolist: 78 vec_state = vectors[:, vec_index] 79 phenotype_change = vec_state * magnitude 80 phenotype_index = parameterization.traits[trait].start + age 81 phenodiff[:, phenotype_index] += phenotype_change 82 return phenodiff 83 84 else: 85 raise Exception("Neither phenomatrix nor phenolist has been provided.") 86 87 def phenodiff_accelerated(self, vectors, zeropheno): 88 if self.phenomatrix is not None: 89 return vectors.dot(self.phenomatrix) 90 91 elif self.phenolist is not None: 92 self._resolve_phenolist() 93 phenodiff = zeropheno.copy() 94 phenodiff = apply_phenolist_numba( 95 phenodiff, vectors, self._vec_indices, self._phenotype_indices, self._magnitudes, 96 ) 97 return phenodiff 98 99 else: 100 raise Exception("Neither phenomatrix nor phenolist has been provided.") 101 102 def __call__(self, interpretome, zeropheno): 103 if self.dummy: 104 return zeropheno 105 else: 106 # phenodiff_old = self.phenodiff(vectors=interpretome, zeropheno=zeropheno) 107 phenodiff = self.phenodiff_accelerated(vectors=interpretome, zeropheno=zeropheno) 108 return phenodiff 109 110 111@njit(parallel=True) 112def apply_phenolist_numba(phenodiff, vectors, vec_indices, phenotype_indices, magnitudes): 113 """Apply phenolist effects to phenodiff, parallelized over individuals. 114 115 Instead of pre-gathering vec_states = vectors[:, vec_indices] (which allocates 116 a large temporary array), this kernel indexes directly into vectors. 117 The outer loop is over individuals (prange) so each thread writes to its own 118 row of phenodiff — no race conditions. 119 120 Args: 121 phenodiff: (n_individuals, n_phenotype_cols) output array, modified in place 122 vectors: (n_individuals, genome_size) haploidized genome values 123 vec_indices: (n_phenolist,) genome column index for each phenolist entry 124 phenotype_indices: (n_phenolist,) phenotype column index for each entry 125 magnitudes: (n_phenolist,) effect size for each entry 126 """ 127 n_individuals = phenodiff.shape[0] 128 n_phenolist = vec_indices.shape[0] 129 for j in prange(n_individuals): 130 for i in range(n_phenolist): 131 phenodiff[j, phenotype_indices[i]] += vectors[j, vec_indices[i]] * magnitudes[i] 132 return phenodiff
8class GPM: 9 """Genotype-phenotype map 10 11 Order of elements in the vector does not matter. # TODO Explain better 12 13 ### GENOTYPE-PHENOTYPE MAP (GPM) ### 14 In AEGIS, every individual carries a genome which encodes an intrinsic phenotype. 15 A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM). 16 Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype 17 of the individual (e.g. the first site decreases fertility by 0.15% at age class 28). 18 19 The GPM can be saved in two formats: a list or a matrix. 20 21 If it is a list, it will be a list of quadruple (4-tuple) with the following structure: `index`, `trait`, `age`, `magnitude`. 22 Thus a single quadruple encodes an effect of a single site at the index `index` (e.g. 1) 23 on the trait `trait` (e.g. fertility) expressed at the age `age` (e.g. 28). The change to the trait is of magnitude `magnitude` (0.85). 24 When a site is pleiotropic, there will be multiple quadruples with the same `index`. 25 We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits). 26 27 If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode `index`, `trait` and `age`, 28 while the matrix values encode the `magnitude`s. 29 30 When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix. 31 When most sites have age-specific and trait-specific effects, the optimal encoding format is a list 32 rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's). 33 """ 34 35 def __init__(self, phenomatrix, phenolist): 36 self.phenomatrix = phenomatrix 37 self.phenolist = phenolist 38 39 self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None 40 if self.dummy: 41 logging.info("Phenomap inactive.") 42 43 # Pre-resolved arrays for phenodiff_accelerated (cached to avoid 44 # recomputing every call). Populated lazily on first use because 45 # parameterization.traits may not be ready at __init__ time. 46 self._resolved = False 47 self._vec_indices = None 48 self._phenotype_indices = None 49 self._magnitudes = None 50 51 def _resolve_phenolist(self): 52 """Pre-resolve phenolist into numpy arrays (once).""" 53 if self._resolved: 54 return 55 vec_indices, traits, ages, magnitudes = zip(*self.phenolist) 56 self._vec_indices = np.array(vec_indices, dtype=np.int64) 57 self._magnitudes = np.array(magnitudes, dtype=np.float64) 58 self._phenotype_indices = np.array( 59 [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)], 60 dtype=np.int64, 61 ) 62 self._resolved = True 63 64 def phenodiff(self, vectors, zeropheno): 65 """ 66 vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) 67 phenomatrix .. 68 phenolist .. list of (bit_index, trait, age, magnitude) 69 zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0) 70 """ 71 72 if self.phenomatrix is not None: 73 # TODO BUG resolve phenomatrix 74 return vectors.dot(self.phenomatrix) 75 76 elif self.phenolist is not None: 77 phenodiff = zeropheno.copy() 78 for vec_index, trait, age, magnitude in self.phenolist: 79 vec_state = vectors[:, vec_index] 80 phenotype_change = vec_state * magnitude 81 phenotype_index = parameterization.traits[trait].start + age 82 phenodiff[:, phenotype_index] += phenotype_change 83 return phenodiff 84 85 else: 86 raise Exception("Neither phenomatrix nor phenolist has been provided.") 87 88 def phenodiff_accelerated(self, vectors, zeropheno): 89 if self.phenomatrix is not None: 90 return vectors.dot(self.phenomatrix) 91 92 elif self.phenolist is not None: 93 self._resolve_phenolist() 94 phenodiff = zeropheno.copy() 95 phenodiff = apply_phenolist_numba( 96 phenodiff, vectors, self._vec_indices, self._phenotype_indices, self._magnitudes, 97 ) 98 return phenodiff 99 100 else: 101 raise Exception("Neither phenomatrix nor phenolist has been provided.") 102 103 def __call__(self, interpretome, zeropheno): 104 if self.dummy: 105 return zeropheno 106 else: 107 # phenodiff_old = self.phenodiff(vectors=interpretome, zeropheno=zeropheno) 108 phenodiff = self.phenodiff_accelerated(vectors=interpretome, zeropheno=zeropheno) 109 return phenodiff
Genotype-phenotype map
Order of elements in the vector does not matter. # TODO Explain better
GENOTYPE-PHENOTYPE MAP (GPM)
In AEGIS, every individual carries a genome which encodes an intrinsic phenotype. A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM). Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype of the individual (e.g. the first site decreases fertility by 0.15% at age class 28).
The GPM can be saved in two formats: a list or a matrix.
If it is a list, it will be a list of quadruple (4-tuple) with the following structure: index, trait, age, magnitude.
Thus a single quadruple encodes an effect of a single site at the index index (e.g. 1)
on the trait trait (e.g. fertility) expressed at the age age (e.g. 28). The change to the trait is of magnitude magnitude (0.85).
When a site is pleiotropic, there will be multiple quadruples with the same index.
We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits).
If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode index, trait and age,
while the matrix values encode the magnitudes.
When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix. When most sites have age-specific and trait-specific effects, the optimal encoding format is a list rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's).
35 def __init__(self, phenomatrix, phenolist): 36 self.phenomatrix = phenomatrix 37 self.phenolist = phenolist 38 39 self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None 40 if self.dummy: 41 logging.info("Phenomap inactive.") 42 43 # Pre-resolved arrays for phenodiff_accelerated (cached to avoid 44 # recomputing every call). Populated lazily on first use because 45 # parameterization.traits may not be ready at __init__ time. 46 self._resolved = False 47 self._vec_indices = None 48 self._phenotype_indices = None 49 self._magnitudes = None
64 def phenodiff(self, vectors, zeropheno): 65 """ 66 vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) 67 phenomatrix .. 68 phenolist .. list of (bit_index, trait, age, magnitude) 69 zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0) 70 """ 71 72 if self.phenomatrix is not None: 73 # TODO BUG resolve phenomatrix 74 return vectors.dot(self.phenomatrix) 75 76 elif self.phenolist is not None: 77 phenodiff = zeropheno.copy() 78 for vec_index, trait, age, magnitude in self.phenolist: 79 vec_state = vectors[:, vec_index] 80 phenotype_change = vec_state * magnitude 81 phenotype_index = parameterization.traits[trait].start + age 82 phenodiff[:, phenotype_index] += phenotype_change 83 return phenodiff 84 85 else: 86 raise Exception("Neither phenomatrix nor phenolist has been provided.")
vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) phenomatrix .. phenolist .. list of (bit_index, trait, age, magnitude) zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)
88 def phenodiff_accelerated(self, vectors, zeropheno): 89 if self.phenomatrix is not None: 90 return vectors.dot(self.phenomatrix) 91 92 elif self.phenolist is not None: 93 self._resolve_phenolist() 94 phenodiff = zeropheno.copy() 95 phenodiff = apply_phenolist_numba( 96 phenodiff, vectors, self._vec_indices, self._phenotype_indices, self._magnitudes, 97 ) 98 return phenodiff 99 100 else: 101 raise Exception("Neither phenomatrix nor phenolist has been provided.")
112@njit(parallel=True) 113def apply_phenolist_numba(phenodiff, vectors, vec_indices, phenotype_indices, magnitudes): 114 """Apply phenolist effects to phenodiff, parallelized over individuals. 115 116 Instead of pre-gathering vec_states = vectors[:, vec_indices] (which allocates 117 a large temporary array), this kernel indexes directly into vectors. 118 The outer loop is over individuals (prange) so each thread writes to its own 119 row of phenodiff — no race conditions. 120 121 Args: 122 phenodiff: (n_individuals, n_phenotype_cols) output array, modified in place 123 vectors: (n_individuals, genome_size) haploidized genome values 124 vec_indices: (n_phenolist,) genome column index for each phenolist entry 125 phenotype_indices: (n_phenolist,) phenotype column index for each entry 126 magnitudes: (n_phenolist,) effect size for each entry 127 """ 128 n_individuals = phenodiff.shape[0] 129 n_phenolist = vec_indices.shape[0] 130 for j in prange(n_individuals): 131 for i in range(n_phenolist): 132 phenodiff[j, phenotype_indices[i]] += vectors[j, vec_indices[i]] * magnitudes[i] 133 return phenodiff
Apply phenolist effects to phenodiff, parallelized over individuals.
Instead of pre-gathering vec_states = vectors[:, vec_indices] (which allocates a large temporary array), this kernel indexes directly into vectors. The outer loop is over individuals (prange) so each thread writes to its own row of phenodiff — no race conditions.
Args: phenodiff: (n_individuals, n_phenotype_cols) output array, modified in place vectors: (n_individuals, genome_size) haploidized genome values vec_indices: (n_phenolist,) genome column index for each phenolist entry phenotype_indices: (n_phenolist,) phenotype column index for each entry magnitudes: (n_phenolist,) effect size for each entry