aegis_sim.utilities.container API documentation

Container(basepath) View Source

27    def __init__(self, basepath):
28        self.basepath = pathlib.Path(
29            basepath
30        ).absolute()  # If path to config file is /path/_.yml, then basepath is /path/_
31        self.name = self.basepath.stem
32        self.data = {}
33        # self.set_paths()
34        self.paths = None
35        self.ticker = None

basepath

name

data

paths

ticker

def set_paths(self): View Source

37    def set_paths(self):
38        # TODO smarter way of listing paths; you are capturing te files with number keys e.g. '6': ... /te/6.csv; that's silly
39        # TODO these are repeated elsewhere, e.g. path for ticker
40        self.paths = {
41            path.stem: path for path in self.basepath.glob("**/*") if path.is_file() and path.suffix == ".csv"
42        }
43        self.paths["log"] = self.basepath / "progress.log"
44        self.paths["ticker"] = self.basepath / "ticker.txt"
45        self.paths["simpleprogress"] = self.basepath / "simpleprogress.log"
46        self.paths["output_summary"] = self.basepath / "output_summary.json"
47        self.paths["input_summary"] = self.basepath / "input_summary.json"
48        self.paths["envdriftmap"] = self.basepath / "envdriftmap.csv"
49        self.paths["snapshots"] = {}
50        for kind in ("demography", "phenotypes", "genotypes"):
51            self.paths["snapshots"][kind] = sorted(
52                (self.basepath / "snapshots" / kind).glob("*"),
53                key=lambda path: int(path.stem),
54            )
55        self.paths["pickles"] = sorted(
56            (self.basepath / "pickles").glob("*"),
57            key=lambda path: int(path.stem),
58        )
59        # self.paths["te"] = sorted(
60        #     (self.basepath / "te").glob("*"),
61        #     key=lambda path: int(path.stem),
62        # )
63        self.paths["popsize_before_reproduction"] = self.basepath / "popsize_before_reproduction.csv"
64        self.paths["popsize_after_reproduction"] = self.basepath / "popsize_after_reproduction.csv"
65        self.paths["eggnum_after_reproduction"] = self.basepath / "eggnum_after_reproduction.csv"
66
67        if not self.paths["log"].is_file():
68            logging.error(f"No AEGIS log found at path {self.paths['log']}.")

def get_paths(self): View Source

70    def get_paths(self):
71        if self.paths is None:
72            self.set_paths()
73        return self.paths

def get_path(self, name): View Source

75    def get_path(self, name):
76        if self.paths is None:
77            self.set_paths()
78        return self.paths[name]

def get_record_structure(self): View Source

80    def get_record_structure(self):
81        # TODO
82        return

def report(self): View Source

84    def report(self):
85        """Report present and missing files"""
86        # TODO
87        return

Report present and missing files

def export(self): View Source

89    def export(self):
90        """Export all primary data from the container using general formats"""
91        # TODO
92        return

Export all primary data from the container using general formats

@staticmethod

def stop_process(pid, kind_of_process): View Source

 94    @staticmethod
 95    def stop_process(pid, kind_of_process):
 96        try:
 97            logging.info(f"Terminating {kind_of_process} process with PID {pid}...")
 98            process = psutil.Process(pid)
 99            process.terminate()  # or process.kill()
100            process.wait()  # Optional: Wait for the process to be fully terminated
101            logging.info(f"Process with PID {pid} terminated successfully.")
102        except psutil.NoSuchProcess:
103            logging.warning(f"No process found with PID {pid}.")
104        except psutil.AccessDenied:
105            logging.warning(f"Access denied when trying to terminate the process with PID {pid}.")
106        except Exception as e:
107            logging.error(f"An error occurred: {e}")

def terminate(self): View Source

109    def terminate(self):
110        pid = self.get_input_summary()["pid"]
111        assert pid is not None
112        self.stop_process(pid, "simulation")
113        tpid = self.get_input_summary()["ticker_pid"]
114        assert tpid is not None
115        self.stop_process(tpid, "ticker")

def get_log(self, reload=True): View Source

121    def get_log(self, reload=True):
122        if ("log" not in self.data) or reload:
123            df = pd.read_csv(self.get_path("log"), sep="|")
124            df.columns = [x.strip() for x in df.columns]
125
126            def dhm_inverse(dhm):
127                nums = dhm.replace("`", ":").split(":")
128                return int(nums[0]) * 24 * 60 + int(nums[1]) * 60 + int(nums[2])
129
130            # TODO resolve deprecated function
131            try:
132                df[["ETA", "t1M", "runtime"]].map(dhm_inverse)
133            except:
134                df[["ETA", "t1M", "runtime"]].applymap(dhm_inverse)
135            self.data["log"] = df
136        return self.data["log"]

def get_simple_log(self): View Source

138    def get_simple_log(self):
139        try:
140            with open(self.get_path("simpleprogress"), "r") as file_:
141                text = file_.read()
142                step, steps_per_simulation = text.split("/")
143                return int(step), int(steps_per_simulation)
144        except:
145            logging.error(f"No simpleprogress.log found at {self.get_path('simpleprogress')}")

def get_ticker(self): View Source

147    def get_ticker(self):
148        if self.ticker is None:
149            TICKER_RATE = self.get_final_config()["TICKER_RATE"]
150            self.ticker = Ticker(TICKER_RATE=TICKER_RATE, odir=self.get_path("ticker").parent)
151        return self.ticker

def get_config(self): View Source

153    def get_config(self):
154        if "config" not in self.data:
155            path = self.basepath.parent / f"{self.basepath}.yml"
156            with open(path, "r") as file_:
157                custom_config = yaml.safe_load(file_)
158            # default_config = get_default_parameters()
159            if custom_config is None:
160                custom_config = {}
161            # self.data["config"] = {**default_config, **custom_config}
162            self.data["config"] = custom_config
163        return self.data["config"]

def get_final_config(self): View Source

165    def get_final_config(self):
166        if "final_config" not in self.data:
167            path = self.basepath / "final_config.yml"
168            with open(path, "r") as file_:
169                final_config = yaml.safe_load(file_)
170            if final_config is None:
171                final_config = {}
172            self.data["final_config"] = final_config
173        return self.data["final_config"]

def get_generations_until_interval(self): View Source

175    def get_generations_until_interval(self):
176        """Return Series of number of generations simulated up until interval i"""
177        # TODO beware that snapshots are not timed linearly; there is a bunch of snapshots at the end of the simulation
178        aar = self.get_average_age_at_reproduction()
179        aar.iloc[0] = np.inf  # No time has passed, so no generations yet
180        IR = self.get_final_config()["INTERVAL_RATE"]
181        aar = aar.pipe(lambda s: IR / s).cumsum()
182        return aar

Return Series of number of generations simulated up until interval i

def get_output_summary(self) -> Optional[dict]: View Source

184    def get_output_summary(self) -> Union[dict, None]:
185        path = self.get_path("output_summary")
186        if path.exists():
187            return self._read_json(path)
188        return {}

def get_input_summary(self): View Source

190    def get_input_summary(self):
191        return self._read_json(self.get_path("input_summary"))

def get_envidriftmap(self): View Source

193    def get_envidriftmap(self):
194        path = self.get_path("envdriftmap")
195        if path.exists():
196            return pd.read_csv(self.get_path("envdriftmap"), header=None)
197        return None

def get_phenomap(self): View Source

199    def get_phenomap(self):
200        path = self.get_path("phenomap")
201        if path.exists():
202            return pd.read_csv(self.get_path("phenomap"))
203        return None

def get_birth_table_observed_interval(self, normalize=False): View Source

209    def get_birth_table_observed_interval(self, normalize=False):
210        """
211        Observed data.
212        Number of births (int) per parental age during an interval of length INTERVAL_RATE.
213        columns.name == parental_age (int)
214        index.name == interval (int)
215        """
216        table = self._read_df("age_at_birth")
217        if normalize:
218            table = table.div(table.sum(1), axis=0)
219        table.index.names = ["interval"]
220        table.columns.names = ["parental_age"]
221        table.columns = table.columns.astype(int)
222        return table

Observed data. Number of births (int) per parental age during an interval of length INTERVAL_RATE. columns.name == parental_age (int) index.name == interval (int)

def get_life_table_observed_interval(self, normalize=False): View Source

224    def get_life_table_observed_interval(self, normalize=False):
225        """
226        Observed data.
227        Number of individuals (int) per age class observed during an interval of length INTERVAL_RATE.
228        columns.name == age_class (int)
229        index.name == interval (int)
230        """
231        table = self._read_df("additive_age_structure")
232        table.index.names = ["interval"]
233        table.columns.names = ["age_class"]
234        table.columns = table.columns.astype(int)
235        # NOTE normalize by sum
236        if normalize:
237            table = table.div(table.sum(1), axis=0)
238        return table

Observed data. Number of individuals (int) per age class observed during an interval of length INTERVAL_RATE. columns.name == age_class (int) index.name == interval (int)

def get_life_table_observed_snapshot(self, record_index: int, normalize=False): View Source

240    def get_life_table_observed_snapshot(self, record_index: int, normalize=False):
241        """
242        Observed data. Series.
243        Number of individuals (int) per age class observed at some simulation step captured by the record of index record_index.
244        name == count
245        index.name == age_class
246        """
247        AGE_LIMIT = self.get_final_config()["AGE_LIMIT"]
248        table = (
249            self.get_demography_observed_snapshot(record_index)
250            .ages.value_counts()
251            .reindex(range(AGE_LIMIT), fill_value=0)
252        )
253        table.index.names = ["age_class"]
254        return table

Observed data. Series. Number of individuals (int) per age class observed at some simulation step captured by the record of index record_index. name == count index.name == age_class

def get_death_table_observed_interval(self, normalize=False): View Source

256    def get_death_table_observed_interval(self, normalize=False):
257        """
258        Observed data. Has a MultiIndex.
259        Number of deaths (int) per age class observed during an interval of length INTERVAL_RATE.
260        columns.name == age_class (int)
261        index.names == ["interval", "cause_of_death"] (int, str)
262        """
263        # TODO think about position of axes
264        table = (
265            pd.concat({causeofdeath: self._read_df(f"age_at_{causeofdeath}") for causeofdeath in VALID_CAUSES_OF_DEATH})
266            .swaplevel()
267            .sort_index(level=0)
268        )
269        table.index.names = ["interval", "cause_of_death"]
270        table.columns.names = ["age_class"]
271        table.columns = table.columns.astype(int)
272        return table

Observed data. Has a MultiIndex. Number of deaths (int) per age class observed during an interval of length INTERVAL_RATE. columns.name == age_class (int) index.names == ["interval", "cause_of_death"] (int, str)

def get_surv_observed_interval(self): View Source

278    def get_surv_observed_interval(self):
279        # TODO this is not accurate; this assumes that the population is in an equilibrium, or it only works if the life table is sampling across a long period
280        lt = self.get_life_table_observed_interval()
281        lt = lt.pct_change(axis=1).shift(-1, axis=1).add(1).replace(np.inf, 1)
282        return lt

def get_fert_observed_interval(self): View Source

284    def get_fert_observed_interval(self):
285        lt = self.get_life_table_observed_interval()
286        bt = self.get_birth_table_observed_interval()
287        return bt / lt

def get_genotypes_intrinsic_snapshot(self, record_index): View Source

295    def get_genotypes_intrinsic_snapshot(self, record_index):
296        """
297        columns .. bit index
298        index .. individual index
299        value .. True or False
300        """
301        # TODO let index denote the step at which the snapshot was taken
302        return self._read_snapshot("genotypes", record_index=record_index)

columns .. bit index index .. individual index value .. True or False

def get_phenotype_intrinsic_snapshot(self, trait, record_index): View Source

304    def get_phenotype_intrinsic_snapshot(self, trait, record_index):
305        """
306        columns .. phenotypic trait index
307        index .. individual index
308        value .. phenotypic trait value
309        """
310        # TODO organize by trait
311        # TODO let index denote the step at which the snapshot was taken
312        df = self._read_snapshot("phenotypes", record_index=record_index)
313        # df.columns = df.columns.str.split("_")
314        return df

columns .. phenotypic trait index index .. individual index value .. phenotypic trait value

def get_demography_observed_snapshot(self, record_index): View Source

316    def get_demography_observed_snapshot(self, record_index):
317        """
318        columns .. ages, births, birthdays, generations, sizes, sexes
319        index .. individual index
320        """
321        # TODO let index denote the step at which the snapshot was taken
322        return self._read_snapshot("demography", record_index=record_index)

columns .. ages, births, birthdays, generations, sizes, sexes index .. individual index

def get_genotypes_intrinsic_interval(self, reload=True): View Source

324    def get_genotypes_intrinsic_interval(self, reload=True):
325        """
326        columns .. bit index
327        index .. record index
328        value .. mean bit value
329        """
330        # TODO check that they exist
331        df = pd.read_csv(self.get_path("genotypes"), header=[0, 1], index_col=None)
332        df.index = df.index.astype(int)
333        df.columns = df.columns.set_levels([df.columns.levels[0].astype(int), df.columns.levels[1].astype(int)])
334        df.index.names = ["interval"]
335        df.columns.names = ["bit_index", "ploidy"]
336        return df

columns .. bit index index .. record index value .. mean bit value

def get_phenotype_intrinsic_interval(self, trait, reload=True): View Source

338    def get_phenotype_intrinsic_interval(self, trait, reload=True):
339        """
340        columns .. age
341        index .. record index
342        value .. median phenotypic trait value
343        """
344        # TODO check that they exist
345        df = pd.read_csv(self.get_path("phenotypes"), header=[0, 1])
346        df.index.names = ["interval"]
347        df.index = df.index.astype(int)
348        df.columns.names = ["trait", "age_class"]
349        # TODO age_class is str
350        return df.xs(trait, axis=1)

columns .. age index .. record index value .. median phenotypic trait value

def get_survival_analysis_TE_observed_interval(self, record_index): View Source

352    def get_survival_analysis_TE_observed_interval(self, record_index):
353        """
354        columns .. T, E
355        index .. individual
356        value .. age at event, event (1 .. died, 0 .. alive)
357        """
358        # TODO error with T and E in the record; they are being appended on top
359        assert record_index < len(self.get_path("te")), "Index out of range"
360        data = pd.read_csv(self.get_path("te")[record_index], header=0)
361        data.index.names = ["individual"]
362        return data

columns .. T, E index .. individual value .. age at event, event (1 .. died, 0 .. alive)

def get_population_size_before_reproduction(self): View Source

364    def get_population_size_before_reproduction(self):
365        data = pd.read_csv(self.get_path("popsize_before_reproduction"), header=None)
366        data.index.names = ["steps"]
367        data.columns = ["popsize"]
368        return data

def get_population_size_after_reproduction(self): View Source

370    def get_population_size_after_reproduction(self):
371        data = pd.read_csv(self.get_path("popsize_after_reproduction"), header=None)
372        data.index.names = ["steps"]
373        data.columns = ["popsize"]
374        return data

def get_egg_number_after_reproduction(self): View Source

376    def get_egg_number_after_reproduction(self):
377        data = pd.read_csv(self.get_path("eggnum_after_reproduction"), header=None)
378        data.index.names = ["steps"]
379        data.columns = ["number"]
380        return data

def get_resource_amount_before_scavenging(self): View Source

382    def get_resource_amount_before_scavenging(self):
383        data = pd.read_csv(self.get_path("resources_before_scavenging"), header=None)
384        data.index.names = ["steps"]
385        data.columns = ["resources"]
386        return data

def get_resource_amount_after_scavenging(self): View Source

388    def get_resource_amount_after_scavenging(self):
389        data = pd.read_csv(self.get_path("resources_after_scavenging"), header=None)
390        data.index.names = ["steps"]
391        data.columns = ["resources"]
392        return data

def get_lifetime_reproduction(self): View Source

398    def get_lifetime_reproduction(self):
399        survivorship = self.get_surv_observed_interval().cumprod(1)
400        fertility = self.get_fert_observed_interval()
401        return (survivorship * fertility).sum(axis=1)

def get_average_age_at_reproduction(self): View Source

403    def get_average_age_at_reproduction(self):
404        bt = self.get_birth_table_observed_interval()
405        n_offspring = bt.sum(1)
406        average_age_at_reproduction = (bt * bt.columns).sum(1) / n_offspring
407        return average_age_at_reproduction

def has_ticker_stopped(self): View Source

418    def has_ticker_stopped(self):
419        return self.get_ticker().has_stopped()