aegis_sim.submodels.genetics.modifying.gpm

  1import logging
  2import numpy as np
  3from aegis_sim import parameterization
  4from numba import njit
  5
  6
  7class GPM:
  8    """Genotype-phenotype map
  9
 10    Order of elements in the vector does not matter. # TODO Explain better
 11
 12    ### GENOTYPE-PHENOTYPE MAP (GPM) ###
 13    In AEGIS, every individual carries a genome which encodes an intrinsic phenotype.
 14    A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM).
 15    Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype
 16    of the individual (e.g. the first site decreases fertility by 0.15% at age class 28).
 17
 18    The GPM can be saved in two formats: a list or a matrix.
 19
 20    If it is a list, it will be a list of quadruple (4-tuple) with the following structure: `index`, `trait`, `age`, `magnitude`.
 21    Thus a single quadruple encodes an effect of a single site at the index `index` (e.g. 1)
 22    on the trait `trait` (e.g. fertility) expressed at the age `age` (e.g. 28). The change to the trait is of magnitude `magnitude` (0.85).
 23    When a site is pleiotropic, there will be multiple quadruples with the same `index`.
 24    We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits).
 25
 26    If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode `index`, `trait` and `age`,
 27    while the matrix values encode the `magnitude`s.
 28
 29    When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix.
 30    When most sites have age-specific and trait-specific effects, the optimal encoding format is a list
 31    rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's).
 32    """
 33
 34    def __init__(self, phenomatrix, phenolist):
 35        self.phenomatrix = phenomatrix
 36        self.phenolist = phenolist
 37
 38        self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None
 39        if self.dummy:
 40            logging.info("Phenomap inactive.")
 41
 42    def phenodiff(self, vectors, zeropheno):
 43        """
 44        vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?)
 45        phenomatrix ..
 46        phenolist .. list of (bit_index, trait, age, magnitude)
 47        zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)
 48        """
 49
 50        if self.phenomatrix is not None:
 51            # TODO BUG resolve phenomatrix
 52            return vectors.dot(self.phenomatrix)
 53
 54        elif self.phenolist is not None:
 55            phenodiff = zeropheno.copy()
 56            for vec_index, trait, age, magnitude in self.phenolist:
 57                vec_state = vectors[:, vec_index]
 58                phenotype_change = vec_state * magnitude
 59                phenotype_index = parameterization.traits[trait].start + age
 60                phenodiff[:, phenotype_index] += phenotype_change
 61            return phenodiff
 62
 63        else:
 64            raise Exception("Neither phenomatrix nor phenolist has been provided.")
 65
 66    def phenodiff_accelerated(self, vectors, zeropheno):
 67        if self.phenomatrix is not None:
 68            return vectors.dot(self.phenomatrix)
 69
 70        elif self.phenolist is not None:
 71            phenodiff = zeropheno.copy()
 72            vec_indices, traits, ages, magnitudes = zip(*self.phenolist)
 73            vec_indices = np.array(vec_indices)
 74            magnitudes = np.array(magnitudes)
 75            vec_states = vectors[:, vec_indices]
 76            phenotype_indices = np.array(
 77                [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)]
 78            )
 79            phenodiff = apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes)
 80            return phenodiff
 81
 82        else:
 83            raise Exception("Neither phenomatrix nor phenolist has been provided.")
 84
 85    def __call__(self, interpretome, zeropheno):
 86        if self.dummy:
 87            return zeropheno
 88        else:
 89            # phenodiff_old = self.phenodiff(vectors=interpretome, zeropheno=zeropheno)
 90            phenodiff = self.phenodiff_accelerated(vectors=interpretome, zeropheno=zeropheno)
 91            return phenodiff
 92
 93
 94@njit
 95def apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes):
 96    """
 97    phenodiff .. modification to phenotype vector for a specific trait; shape is (individual, phenotypic site)
 98    vec_states .. state of each genomic site for each individual; shape is (individual, genomic site)
 99    phenotype_indices .. position in the phenotype vector that is modified by the genomic site; shape is (genomic site)
100    magnitudes .. effect size for each site; shape is (genomic site)
101    """
102    n_individuals, n_phenos = vec_states.shape
103    for i in range(n_phenos):
104        for j in range(n_individuals):
105            phenodiff[j, phenotype_indices[i]] += vec_states[j, i] * magnitudes[i]
106    return phenodiff
class GPM:
 8class GPM:
 9    """Genotype-phenotype map
10
11    Order of elements in the vector does not matter. # TODO Explain better
12
13    ### GENOTYPE-PHENOTYPE MAP (GPM) ###
14    In AEGIS, every individual carries a genome which encodes an intrinsic phenotype.
15    A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM).
16    Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype
17    of the individual (e.g. the first site decreases fertility by 0.15% at age class 28).
18
19    The GPM can be saved in two formats: a list or a matrix.
20
21    If it is a list, it will be a list of quadruple (4-tuple) with the following structure: `index`, `trait`, `age`, `magnitude`.
22    Thus a single quadruple encodes an effect of a single site at the index `index` (e.g. 1)
23    on the trait `trait` (e.g. fertility) expressed at the age `age` (e.g. 28). The change to the trait is of magnitude `magnitude` (0.85).
24    When a site is pleiotropic, there will be multiple quadruples with the same `index`.
25    We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits).
26
27    If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode `index`, `trait` and `age`,
28    while the matrix values encode the `magnitude`s.
29
30    When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix.
31    When most sites have age-specific and trait-specific effects, the optimal encoding format is a list
32    rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's).
33    """
34
35    def __init__(self, phenomatrix, phenolist):
36        self.phenomatrix = phenomatrix
37        self.phenolist = phenolist
38
39        self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None
40        if self.dummy:
41            logging.info("Phenomap inactive.")
42
43    def phenodiff(self, vectors, zeropheno):
44        """
45        vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?)
46        phenomatrix ..
47        phenolist .. list of (bit_index, trait, age, magnitude)
48        zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)
49        """
50
51        if self.phenomatrix is not None:
52            # TODO BUG resolve phenomatrix
53            return vectors.dot(self.phenomatrix)
54
55        elif self.phenolist is not None:
56            phenodiff = zeropheno.copy()
57            for vec_index, trait, age, magnitude in self.phenolist:
58                vec_state = vectors[:, vec_index]
59                phenotype_change = vec_state * magnitude
60                phenotype_index = parameterization.traits[trait].start + age
61                phenodiff[:, phenotype_index] += phenotype_change
62            return phenodiff
63
64        else:
65            raise Exception("Neither phenomatrix nor phenolist has been provided.")
66
67    def phenodiff_accelerated(self, vectors, zeropheno):
68        if self.phenomatrix is not None:
69            return vectors.dot(self.phenomatrix)
70
71        elif self.phenolist is not None:
72            phenodiff = zeropheno.copy()
73            vec_indices, traits, ages, magnitudes = zip(*self.phenolist)
74            vec_indices = np.array(vec_indices)
75            magnitudes = np.array(magnitudes)
76            vec_states = vectors[:, vec_indices]
77            phenotype_indices = np.array(
78                [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)]
79            )
80            phenodiff = apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes)
81            return phenodiff
82
83        else:
84            raise Exception("Neither phenomatrix nor phenolist has been provided.")
85
86    def __call__(self, interpretome, zeropheno):
87        if self.dummy:
88            return zeropheno
89        else:
90            # phenodiff_old = self.phenodiff(vectors=interpretome, zeropheno=zeropheno)
91            phenodiff = self.phenodiff_accelerated(vectors=interpretome, zeropheno=zeropheno)
92            return phenodiff

Genotype-phenotype map

Order of elements in the vector does not matter. # TODO Explain better

GENOTYPE-PHENOTYPE MAP (GPM)

In AEGIS, every individual carries a genome which encodes an intrinsic phenotype. A genome can be converted into an intrinsic phenotype using the genotype-phenotype map (GPM). Conceptually, the GPM contains the information on how each site affects the intrinsic phenotype of the individual (e.g. the first site decreases fertility by 0.15% at age class 28).

The GPM can be saved in two formats: a list or a matrix.

If it is a list, it will be a list of quadruple (4-tuple) with the following structure: index, trait, age, magnitude. Thus a single quadruple encodes an effect of a single site at the index index (e.g. 1) on the trait trait (e.g. fertility) expressed at the age age (e.g. 28). The change to the trait is of magnitude magnitude (0.85). When a site is pleiotropic, there will be multiple quadruples with the same index. We distringuish between age-pleiotropy (a single site affecting at least one trait at multiple ages) and trait-pleiotropy (a single site affecting multiple traits).

If the GPM is encoded as a matrix, it is a 3D matrix where dimensions encode index, trait and age, while the matrix values encode the magnitudes.

When most sites are age-pleiotropic and trait-pleiotropic, the optimal encoding format is a matrix. When most sites have age-specific and trait-specific effects, the optimal encoding format is a list rather than a matrix because the matrix will be very sparse (it will carry a lot of 0's).

GPM(phenomatrix, phenolist)
35    def __init__(self, phenomatrix, phenolist):
36        self.phenomatrix = phenomatrix
37        self.phenolist = phenolist
38
39        self.dummy = len(self.phenolist) == 0 and self.phenomatrix is None
40        if self.dummy:
41            logging.info("Phenomap inactive.")
phenomatrix
phenolist
dummy
def phenodiff(self, vectors, zeropheno):
43    def phenodiff(self, vectors, zeropheno):
44        """
45        vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?)
46        phenomatrix ..
47        phenolist .. list of (bit_index, trait, age, magnitude)
48        zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)
49        """
50
51        if self.phenomatrix is not None:
52            # TODO BUG resolve phenomatrix
53            return vectors.dot(self.phenomatrix)
54
55        elif self.phenolist is not None:
56            phenodiff = zeropheno.copy()
57            for vec_index, trait, age, magnitude in self.phenolist:
58                vec_state = vectors[:, vec_index]
59                phenotype_change = vec_state * magnitude
60                phenotype_index = parameterization.traits[trait].start + age
61                phenodiff[:, phenotype_index] += phenotype_change
62            return phenodiff
63
64        else:
65            raise Exception("Neither phenomatrix nor phenolist has been provided.")

vectors .. haploidized genomes of all individuals; shape is (n_individuals, ?) phenomatrix .. phenolist .. list of (bit_index, trait, age, magnitude) zeropheno .. phenotypes of all-zero genomes (i.e. how phenotypes would be if all bits were 0)

def phenodiff_accelerated(self, vectors, zeropheno):
67    def phenodiff_accelerated(self, vectors, zeropheno):
68        if self.phenomatrix is not None:
69            return vectors.dot(self.phenomatrix)
70
71        elif self.phenolist is not None:
72            phenodiff = zeropheno.copy()
73            vec_indices, traits, ages, magnitudes = zip(*self.phenolist)
74            vec_indices = np.array(vec_indices)
75            magnitudes = np.array(magnitudes)
76            vec_states = vectors[:, vec_indices]
77            phenotype_indices = np.array(
78                [parameterization.traits[trait].start + age for trait, age in zip(traits, ages)]
79            )
80            phenodiff = apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes)
81            return phenodiff
82
83        else:
84            raise Exception("Neither phenomatrix nor phenolist has been provided.")
@njit
def apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes):
 95@njit
 96def apply_phenolist_numba(phenodiff, vec_states, phenotype_indices, magnitudes):
 97    """
 98    phenodiff .. modification to phenotype vector for a specific trait; shape is (individual, phenotypic site)
 99    vec_states .. state of each genomic site for each individual; shape is (individual, genomic site)
100    phenotype_indices .. position in the phenotype vector that is modified by the genomic site; shape is (genomic site)
101    magnitudes .. effect size for each site; shape is (genomic site)
102    """
103    n_individuals, n_phenos = vec_states.shape
104    for i in range(n_phenos):
105        for j in range(n_individuals):
106            phenodiff[j, phenotype_indices[i]] += vec_states[j, i] * magnitudes[i]
107    return phenodiff

phenodiff .. modification to phenotype vector for a specific trait; shape is (individual, phenotypic site) vec_states .. state of each genomic site for each individual; shape is (individual, genomic site) phenotype_indices .. position in the phenotype vector that is modified by the genomic site; shape is (genomic site) magnitudes .. effect size for each site; shape is (genomic site)