aegis_sim.submodels.genetics.modifying.gpm

  1import logging
  2import numpy as np
  3from aegis_sim import parameterization
  4from numba import njit, prange
  5
  6
  7class GPM:
  8    """Genotype-phenotype map
  9
 10    Order of elements in the vector does not matter. # TODO Explain better
 11
 12    ### GENOTYPE-PHENOTYPE MAP (GPM) ###
 13    In AEGIS, every individual carries a genome which encodes an intrinsic phenotype.
 14    A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM).
 15    Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype
 16    of the individual (e.g. the first site decreases fertility by 0.15% at age class 28).
 17
 18    The GPM can be saved in two formats: a list or a matrix.
 19
 20    If it is a list, it will be a list of quadruple (4-tuple) with the following structure: `index`, `trait`, `age`, `magnitude`.
 21    Thus a single quadruple encodes an effect of a single site at the index `index` (e.g. 1)
 22    on the trait `trait` (e.g. fertility) expressed at the age `age` (e.g. 28). The change to the trait is of magnitude `magnitude` (0.85).
 23    When a site is pleiotropic, there will be multiple quadruples with the same `index`.
 24    We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits).
 25
 26    If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode `index`, `trait` and `age`,
 27    while the matrix values encode the `magnitude`s.
 28
 29    When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix.
 30    When most sites have age-specific and trait-specific effects, the optimal encoding format is a list
 31    rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's).
 32    """
 33
 34    def __init__(self, phenomatrix, phenolist):
 35        self.phenomatrix = phenomatrix
 36        self.phenolist = phenolist
 37
 38        self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None
 39        if self.dummy:
 40            logging.info("Phenomap inactive.")
 41
 42        # Pre-resolved arrays for phenodiff_accelerated (cached to avoid
 43        # recomputing every call). Populated lazily on first use because
 44        # parameterization.traits may not be ready at __init__ time.
 45        self._resolved = False
 46        self._vec_indices = None
 47        self._phenotype_indices = None
 48        self._magnitudes = None
 49
 50    def _resolve_phenolist(self):
 51        """Pre-resolve phenolist into numpy arrays (once)."""
 52        if self._resolved:
 53            return
 54        vec_indices, traits, ages, magnitudes = zip(*self.phenolist)
 55        self._vec_indices = np.array(vec_indices, dtype=np.int64)
 56        self._magnitudes = np.array(magnitudes, dtype=np.float64)
 57        self._phenotype_indices = np.array(
 58            [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)],
 59            dtype=np.int64,
 60        )
 61        self._resolved = True
 62
 63    def phenodiff(self, vectors, zeropheno):
 64        """
 65        vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?)
 66        phenomatrix ..
 67        phenolist .. list of (bit_index, trait, age, magnitude)
 68        zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)
 69        """
 70
 71        if self.phenomatrix is not None:
 72            # TODO BUG resolve phenomatrix
 73            return vectors.dot(self.phenomatrix)
 74
 75        elif self.phenolist is not None:
 76            phenodiff = zeropheno.copy()
 77            for vec_index, trait, age, magnitude in self.phenolist:
 78                vec_state = vectors[:, vec_index]
 79                phenotype_change = vec_state * magnitude
 80                phenotype_index = parameterization.traits[trait].start + age
 81                phenodiff[:, phenotype_index] += phenotype_change
 82            return phenodiff
 83
 84        else:
 85            raise Exception("Neither phenomatrix nor phenolist has been provided.")
 86
 87    def phenodiff_accelerated(self, vectors, zeropheno):
 88        if self.phenomatrix is not None:
 89            return vectors.dot(self.phenomatrix)
 90
 91        elif self.phenolist is not None:
 92            self._resolve_phenolist()
 93            phenodiff = zeropheno.copy()
 94            phenodiff = apply_phenolist_numba(
 95                phenodiff, vectors, self._vec_indices, self._phenotype_indices, self._magnitudes,
 96            )
 97            return phenodiff
 98
 99        else:
100            raise Exception("Neither phenomatrix nor phenolist has been provided.")
101
102    def __call__(self, interpretome, zeropheno):
103        if self.dummy:
104            return zeropheno
105        else:
106            # phenodiff_old = self.phenodiff(vectors=interpretome, zeropheno=zeropheno)
107            phenodiff = self.phenodiff_accelerated(vectors=interpretome, zeropheno=zeropheno)
108            return phenodiff
109
110
111@njit(parallel=True)
112def apply_phenolist_numba(phenodiff, vectors, vec_indices, phenotype_indices, magnitudes):
113    """Apply phenolist effects to phenodiff, parallelized over individuals.
114
115    Instead of pre-gathering vec_states = vectors[:, vec_indices] (which allocates
116    a large temporary array), this kernel indexes directly into vectors.
117    The outer loop is over individuals (prange) so each thread writes to its own
118    row of phenodiff — no race conditions.
119
120    Args:
121        phenodiff: (n_individuals, n_phenotype_cols) output array, modified in place
122        vectors: (n_individuals, genome_size) haploidized genome values
123        vec_indices: (n_phenolist,) genome column index for each phenolist entry
124        phenotype_indices: (n_phenolist,) phenotype column index for each entry
125        magnitudes: (n_phenolist,) effect size for each entry
126    """
127    n_individuals = phenodiff.shape[0]
128    n_phenolist = vec_indices.shape[0]
129    for j in prange(n_individuals):
130        for i in range(n_phenolist):
131            phenodiff[j, phenotype_indices[i]] += vectors[j, vec_indices[i]] * magnitudes[i]
132    return phenodiff
class GPM:
  8class GPM:
  9    """Genotype-phenotype map
 10
 11    Order of elements in the vector does not matter. # TODO Explain better
 12
 13    ### GENOTYPE-PHENOTYPE MAP (GPM) ###
 14    In AEGIS, every individual carries a genome which encodes an intrinsic phenotype.
 15    A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM).
 16    Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype
 17    of the individual (e.g. the first site decreases fertility by 0.15% at age class 28).
 18
 19    The GPM can be saved in two formats: a list or a matrix.
 20
 21    If it is a list, it will be a list of quadruple (4-tuple) with the following structure: `index`, `trait`, `age`, `magnitude`.
 22    Thus a single quadruple encodes an effect of a single site at the index `index` (e.g. 1)
 23    on the trait `trait` (e.g. fertility) expressed at the age `age` (e.g. 28). The change to the trait is of magnitude `magnitude` (0.85).
 24    When a site is pleiotropic, there will be multiple quadruples with the same `index`.
 25    We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits).
 26
 27    If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode `index`, `trait` and `age`,
 28    while the matrix values encode the `magnitude`s.
 29
 30    When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix.
 31    When most sites have age-specific and trait-specific effects, the optimal encoding format is a list
 32    rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's).
 33    """
 34
 35    def __init__(self, phenomatrix, phenolist):
 36        self.phenomatrix = phenomatrix
 37        self.phenolist = phenolist
 38
 39        self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None
 40        if self.dummy:
 41            logging.info("Phenomap inactive.")
 42
 43        # Pre-resolved arrays for phenodiff_accelerated (cached to avoid
 44        # recomputing every call). Populated lazily on first use because
 45        # parameterization.traits may not be ready at __init__ time.
 46        self._resolved = False
 47        self._vec_indices = None
 48        self._phenotype_indices = None
 49        self._magnitudes = None
 50
 51    def _resolve_phenolist(self):
 52        """Pre-resolve phenolist into numpy arrays (once)."""
 53        if self._resolved:
 54            return
 55        vec_indices, traits, ages, magnitudes = zip(*self.phenolist)
 56        self._vec_indices = np.array(vec_indices, dtype=np.int64)
 57        self._magnitudes = np.array(magnitudes, dtype=np.float64)
 58        self._phenotype_indices = np.array(
 59            [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)],
 60            dtype=np.int64,
 61        )
 62        self._resolved = True
 63
 64    def phenodiff(self, vectors, zeropheno):
 65        """
 66        vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?)
 67        phenomatrix ..
 68        phenolist .. list of (bit_index, trait, age, magnitude)
 69        zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)
 70        """
 71
 72        if self.phenomatrix is not None:
 73            # TODO BUG resolve phenomatrix
 74            return vectors.dot(self.phenomatrix)
 75
 76        elif self.phenolist is not None:
 77            phenodiff = zeropheno.copy()
 78            for vec_index, trait, age, magnitude in self.phenolist:
 79                vec_state = vectors[:, vec_index]
 80                phenotype_change = vec_state * magnitude
 81                phenotype_index = parameterization.traits[trait].start + age
 82                phenodiff[:, phenotype_index] += phenotype_change
 83            return phenodiff
 84
 85        else:
 86            raise Exception("Neither phenomatrix nor phenolist has been provided.")
 87
 88    def phenodiff_accelerated(self, vectors, zeropheno):
 89        if self.phenomatrix is not None:
 90            return vectors.dot(self.phenomatrix)
 91
 92        elif self.phenolist is not None:
 93            self._resolve_phenolist()
 94            phenodiff = zeropheno.copy()
 95            phenodiff = apply_phenolist_numba(
 96                phenodiff, vectors, self._vec_indices, self._phenotype_indices, self._magnitudes,
 97            )
 98            return phenodiff
 99
100        else:
101            raise Exception("Neither phenomatrix nor phenolist has been provided.")
102
103    def __call__(self, interpretome, zeropheno):
104        if self.dummy:
105            return zeropheno
106        else:
107            # phenodiff_old = self.phenodiff(vectors=interpretome, zeropheno=zeropheno)
108            phenodiff = self.phenodiff_accelerated(vectors=interpretome, zeropheno=zeropheno)
109            return phenodiff

Genotype-phenotype map

Order of elements in the vector does not matter. # TODO Explain better

GENOTYPE-PHENOTYPE MAP (GPM)

In AEGIS, every individual carries a genome which encodes an intrinsic phenotype. A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM). Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype of the individual (e.g. the first site decreases fertility by 0.15% at age class 28).

The GPM can be saved in two formats: a list or a matrix.

If it is a list, it will be a list of quadruple (4-tuple) with the following structure: index, trait, age, magnitude. Thus a single quadruple encodes an effect of a single site at the index index (e.g. 1) on the trait trait (e.g. fertility) expressed at the age age (e.g. 28). The change to the trait is of magnitude magnitude (0.85). When a site is pleiotropic, there will be multiple quadruples with the same index. We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits).

If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode index, trait and age, while the matrix values encode the magnitudes.

When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix. When most sites have age-specific and trait-specific effects, the optimal encoding format is a list rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's).

GPM(phenomatrix, phenolist)
35    def __init__(self, phenomatrix, phenolist):
36        self.phenomatrix = phenomatrix
37        self.phenolist = phenolist
38
39        self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None
40        if self.dummy:
41            logging.info("Phenomap inactive.")
42
43        # Pre-resolved arrays for phenodiff_accelerated (cached to avoid
44        # recomputing every call). Populated lazily on first use because
45        # parameterization.traits may not be ready at __init__ time.
46        self._resolved = False
47        self._vec_indices = None
48        self._phenotype_indices = None
49        self._magnitudes = None
phenomatrix
phenolist
dummy
def phenodiff(self, vectors, zeropheno):
64    def phenodiff(self, vectors, zeropheno):
65        """
66        vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?)
67        phenomatrix ..
68        phenolist .. list of (bit_index, trait, age, magnitude)
69        zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)
70        """
71
72        if self.phenomatrix is not None:
73            # TODO BUG resolve phenomatrix
74            return vectors.dot(self.phenomatrix)
75
76        elif self.phenolist is not None:
77            phenodiff = zeropheno.copy()
78            for vec_index, trait, age, magnitude in self.phenolist:
79                vec_state = vectors[:, vec_index]
80                phenotype_change = vec_state * magnitude
81                phenotype_index = parameterization.traits[trait].start + age
82                phenodiff[:, phenotype_index] += phenotype_change
83            return phenodiff
84
85        else:
86            raise Exception("Neither phenomatrix nor phenolist has been provided.")

vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) phenomatrix .. phenolist .. list of (bit_index, trait, age, magnitude) zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)

def phenodiff_accelerated(self, vectors, zeropheno):
 88    def phenodiff_accelerated(self, vectors, zeropheno):
 89        if self.phenomatrix is not None:
 90            return vectors.dot(self.phenomatrix)
 91
 92        elif self.phenolist is not None:
 93            self._resolve_phenolist()
 94            phenodiff = zeropheno.copy()
 95            phenodiff = apply_phenolist_numba(
 96                phenodiff, vectors, self._vec_indices, self._phenotype_indices, self._magnitudes,
 97            )
 98            return phenodiff
 99
100        else:
101            raise Exception("Neither phenomatrix nor phenolist has been provided.")
@njit(parallel=True)
def apply_phenolist_numba(phenodiff, vectors, vec_indices, phenotype_indices, magnitudes):
112@njit(parallel=True)
113def apply_phenolist_numba(phenodiff, vectors, vec_indices, phenotype_indices, magnitudes):
114    """Apply phenolist effects to phenodiff, parallelized over individuals.
115
116    Instead of pre-gathering vec_states = vectors[:, vec_indices] (which allocates
117    a large temporary array), this kernel indexes directly into vectors.
118    The outer loop is over individuals (prange) so each thread writes to its own
119    row of phenodiff — no race conditions.
120
121    Args:
122        phenodiff: (n_individuals, n_phenotype_cols) output array, modified in place
123        vectors: (n_individuals, genome_size) haploidized genome values
124        vec_indices: (n_phenolist,) genome column index for each phenolist entry
125        phenotype_indices: (n_phenolist,) phenotype column index for each entry
126        magnitudes: (n_phenolist,) effect size for each entry
127    """
128    n_individuals = phenodiff.shape[0]
129    n_phenolist = vec_indices.shape[0]
130    for j in prange(n_individuals):
131        for i in range(n_phenolist):
132            phenodiff[j, phenotype_indices[i]] += vectors[j, vec_indices[i]] * magnitudes[i]
133    return phenodiff

Apply phenolist effects to phenodiff, parallelized over individuals.

Instead of pre-gathering vec_states = vectors[:, vec_indices] (which allocates a large temporary array), this kernel indexes directly into vectors. The outer loop is over individuals (prange) so each thread writes to its own row of phenodiff — no race conditions.

Args: phenodiff: (n_individuals, n_phenotype_cols) output array, modified in place vectors: (n_individuals, genome_size) haploidized genome values vec_indices: (n_phenolist,) genome column index for each phenolist entry phenotype_indices: (n_phenolist,) phenotype column index for each entry magnitudes: (n_phenolist,) effect size for each entry