Source code for straindesign.strainDesignProblem

#!/usr/bin/env python3
#
# Copyright 2022 Max Planck Insitute Magdeburg
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
#
#
"""Classes and functions for the construction of strain design MILPs

This module contains functions that help construct mixed-integer linear problems,
mainly functions that facilitate the construction of LP and Farkas dual
problems from linear problems of the type A_ineq*x<=b_ineq, A_eq*x<=b_eq, lb<=x<=ub.
The functions also help keeping track of the relationship of constraints and variables
and their individual counterparts in dual problems, which is essential when simulating 
knockouts in dual problems. Most of the time, the sparse datatype is used to store and
edit matrices for improved speed and memory."""

import numpy as np
from scipy import sparse
from cobra.util import create_stoichiometric_matrix
from cobra import Model, Configuration
from typing import List, Tuple
from straindesign import SDModule, IndicatorConstraints, lineqlist2mat, linexprdict2mat, MILP_LP, SDPool, \
                         avail_solvers, select_solver, remove_dummy_bounds, SDModule
from straindesign.names import *
import logging


[docs]class SDProblem:
    """Strain design MILP
    
    The construcor of this class translates a model and strain design modules into a mixed integer linear
    problem. This class, however, is the backbone of the strain design computation. Preprocessing steps 
    that enable gene, reaction and regulatory interventions, or network compression usually preceed the 
    construction of an SDProblem-object and are integrated in the function *compute_strain_designs*. 

    Args:
        model (cobra.Model):
            A metabolic model that is an instance of the cobra.Model class.
        
        sd_modules ((list of) straindesign.SDModule):
            Modules that specify the strain design problem, e.g., protected or suppressed flux states for 
            MCS strain design or inner and outer objective functions for OptKnock. See description
            of SDModule for more information on how to set up modules.
            
        ko_cost (optional (dict)): (Default: None)
            A dictionary of reaction identifiers and their associated knockout costs. If not specified, all reactions
            are treated as knockout candidates, equivalent to ko_cost = {'r1':1, 'r2':1, ...}. If a subset of reactions
            is listed in the dict, all other are not considered as knockout candidates.
            
        ki_cost (optional (dict)): (Default: None)
            A dictionary of reaction identifiers and their associated costs for addition. If not specified, all reactions
            are treated as knockout candidates. Reaction addition candidates must be present in the original model with
            the intended flux boundaries **after** insertion. Additions are treated adversely to knockouts, meaning that
            their exclusion from the network is not associated with any cost while their presence entails intervention costs.
            
        max_cost (optional (int)): (Default: inf): 
            The maximum cost threshold for interventions. Every possible intervention is associated with a
            cost value (1, by default). Strain designs cannot exceed the max_cost threshold. Individual
            intervention cost factors may be defined through ki_cost and ko_cost.
            
        solver (optional (str)): (Default: same as defined in model / COBRApy)
            The solver that should be used for preparing and carrying out the strain design computation.
            Allowed values are 'cplex', 'gurobi', 'scip' and 'glpk'.
            
        M (optional (int)): (Default: None)
            If this value is specified (and non-zero, not None), the computation uses the big-M 
            method instead of indicator constraints. Since GLPK does not support indicator constraints it uses
            the big-M method by default (with COBRA standard M=1000). M should be chosen 'sufficiently large' 
            to avoid computational artifacts and 'sufficiently small' to avoid numerical issues.
            
        essential_kis (optional (set)):
            A set of reactions that are marked as addable and that are essential for at least one of the
            strain design modules. Providing such "essential knock-ins" may speed up the strain design computation.
            
    Returns:
        (SDProblem):
            An instance of SDProblem containing the strain design MILP
    """

    def __init__(self, model: Model, sd_modules: List[SDModule], *args, **kwargs):
        allowed_keys = {KOCOST, KICOST, SOLVER, MAX_COST, 'M', 'essential_kis'}
        # set all keys passed in kwargs
        for key, value in dict(kwargs).items():
            if key in allowed_keys:
                setattr(self, key, value)
            else:
                raise Exception("Key " + key + " is not supported.")
        # set all remaining keys to None
        for key in allowed_keys:
            if key not in dict(kwargs).keys():
                setattr(self, key, None)

        if "SD_Module" in str(type(sd_modules)):
            sd_modules = [sd_modules]

        if self.solver is None:
            if len(avail_solvers) > 0:
                self.solver = list(avail_solvers)[0]
            else:
                raise Exception('No solver available. Please ensure that one of the following '\
                    'solvers is avaialable in your Python environment: CPLEX, Gurobi, SCIP, GLPK')
        self.solver = select_solver(self.solver, model)
        cobra_conf = Configuration()
        bound_thres = max((abs(cobra_conf.lower_bound), abs(cobra_conf.upper_bound)))
        if self.M is None and self.solver == 'glpk':
            logging.warning(
                'GLPK only supports strain design computation with the bigM method. Using cobra bound: '+str(bound_thres)+\
                ' as M.')
            self.M = bound_thres
        elif self.M is None:
            self.M = np.inf
        # the matrices in sd_modules, ko_cost and ki_cost should be numpy.array or scipy.sparse (csr, csc, lil) format
        self.model = model
        self.sd_modules = sd_modules
        reac_ids = model.reactions.list_attr("id")
        numr = len(model.reactions)
        # Create vectors for ko_cost, ki_cost, inverted bool-vars and non-targetable bools
        # If no knockable reactions are assigned, assume all are KO-able.
        # Generally, KIs overwrite KOs
        if self.ko_cost is None:
            self.ko_cost = {rid: 1.0 for rid in reac_ids}
        if self.ki_cost is None:
            self.ki_cost = {}
        if self.essential_kis is None:
            self.essential_kis = set()
        self.ko_cost = [float(self.ko_cost.get(key)) if (key in self.ko_cost.keys()) else np.nan for key in reac_ids]
        self.ki_cost = [float(self.ki_cost.get(key)) if (key in self.ki_cost.keys()) else np.nan for key in reac_ids]
        self.ko_cost = [self.ko_cost[i] if np.isnan(self.ki_cost[i]) else np.nan for i in range(numr)]
        self.num_z = numr
        self.cost = [i for i in self.ko_cost]
        for i in [i for i, x in enumerate(self.ki_cost) if not np.isnan(x)]:
            self.cost[i] = self.ki_cost[i]
        self.z_inverted = [not np.isnan(x) for x in self.ki_cost]
        self.z_non_targetable = [np.isnan(x) for x in self.cost]
        for i in [i for i, x in enumerate(self.cost) if np.isnan(x)]:
            self.cost[i] = 0.0
        # Prepare top 3 lines of MILP (sum of weighted interventions below (0) and above (1) threshold) and objective function (2)
        self.idx_row_maxcost = 0
        self.idx_row_mincost = 1
        self.idx_row_obj = 2
        self.A_ineq = sparse.csr_matrix([[-i for i in self.cost], self.cost, [0 for _ in range(self.num_z)]])
        if self.max_cost is None:
            self.b_ineq = [0.0, float(np.sum(np.abs(self.cost))), np.inf]
        else:
            self.b_ineq = [0.0, float(self.max_cost), np.inf]
        self.z_map_constr_ineq = sparse.csc_matrix((numr, 3))
        self.lb = [1.0 if r in self.essential_kis else 0.0 for r in model.reactions]
        self.ub = [1.0 - float(i) for i in self.z_non_targetable]
        self.idx_z = [i for i in range(0, numr)]
        self.c = [0.0] * numr
        # Initialize also empty equality matrix
        self.A_eq = sparse.csc_matrix((0, numr))
        self.b_eq = []
        self.z_map_constr_eq = sparse.csc_matrix((numr, 0))
        self.num_modules = 0
        self.indic_constr = []  # Add instances of the class 'Indicator_constraint' later
        # Initialize association between z and variables and variables
        self.z_map_vars = sparse.csc_matrix((numr, numr))
        # replace bounds with inf if above a cobra bound threshold
        remove_dummy_bounds(self.model)
        logging.info('Constructing strain design MILP for solver: ' + self.solver + '.')
        for i in range(len(sd_modules)):
            self.addModule(sd_modules[i])

        # Assign knock-ins/outs correctly by taking into account z_inverted
        # invert *(-1) rows in z_map_constr_eq, z_map_constr_ineq, z_map_vars
        # where there are "knock-ins" / additions
        # make knock-in/out matrix
        z_kos_kis = [
            1 if (not self.z_non_targetable[i]) and (not self.z_inverted[i]) else -1 if self.z_inverted[i] else 0
            for i in range(0, self.num_z)
        ]
        z_kos_kis = sparse.diags(z_kos_kis)
        self.z_map_constr_ineq = z_kos_kis * self.z_map_constr_ineq
        self.z_map_constr_eq = z_kos_kis * self.z_map_constr_eq
        self.z_map_vars = z_kos_kis * self.z_map_vars

        # Save continous part of MILP for easy strain design validation
        cont_vars = [i for i in range(0, self.A_ineq.shape[1]) if not i in self.idx_z]
        self.cont_MILP = ContMILP(self.A_ineq[:, cont_vars], self.b_ineq.copy(), self.A_eq[:, cont_vars], self.b_eq.copy(),
                                  [self.lb[i] for i in cont_vars], [self.ub[i] for i in cont_vars], [self.c[i] for i in cont_vars],
                                  self.z_map_constr_ineq.tocoo(), self.z_map_constr_eq.tocoo(), self.z_map_vars[:, cont_vars].tocoo())

        # 4. Link LP module to z-variables
        self.link_z()

        # if there are only mcs modules, minimize the knockout costs,
        # otherwise use objective function(s) from modules
        if all([mod[MODULE_TYPE] in [PROTECT, SUPPRESS] for mod in sd_modules]):
            for i in self.idx_z:
                self.c[i] = self.cost[i]
            self.is_mcs_computation = True
        else:
            self.is_mcs_computation = False
            for i in self.idx_z:
                self.c[i] = 0.0
            self.A_ineq = self.A_ineq.tolil()
            self.A_ineq[2] = sparse.lil_matrix(self.c)  # set objective
            self.A_ineq = self.A_ineq.tocsr()

        # backup objective function
        self.c_bu = [float(i) for i in self.c.copy()]

        # # for debugging
        # A = sparse.vstack(( self.A_ineq,sparse.csr_matrix([np.nan]*len(self.c)),\
        #                     self.A_eq,sparse.csr_matrix([np.nan]*len(self.c)),\
        #                     self.indic_constr.A,sparse.csr_matrix([np.nan]*len(self.c)),\
        #                     sparse.csr_matrix(self.ub),sparse.csr_matrix(self.lb)))
        # b = self.b_ineq + [np.nan] + self.b_eq + [np.nan] + self.indic_constr.b + [np.nan, np.nan, np.nan]
        # Ab = sparse.hstack((A,sparse.csr_matrix([np.nan]*len(b)).transpose(),sparse.csr_matrix(b).transpose()))
        # np.savetxt("Ab_py.tsv", Ab.todense(), delimiter='\t')
        self.vtype = 'B' * self.num_z + 'C' * (self.z_map_vars.shape[1] - self.num_z)

[docs]    def addModule(self, sd_module):
        """Generate module LP and z-linking-matrix for each module and add them to the strain design MILP
        
        Args:
            sd_module (straindesign.SDModule):
                Modules to describe strain design problems like protected or suppressed flux states for 
                MCS strain design or inner and outer objective functions for OptKnock. See description
                of SDModule for more information on how to set up modules.
        """
        self.num_modules += 1
        z_map_constr_ineq_i = []
        z_map_constr_eq_i = []
        z_map_vars_i = []
        # 1. Translate (in)equalities into matrix form
        V_ineq, v_ineq, V_eq, v_eq = lineqlist2mat(sd_module[CONSTRAINTS], self.model.reactions.list_attr('id'))
        # 2. Construct LP for module
        if sd_module[MODULE_TYPE] in [PROTECT, SUPPRESS] and sd_module[INNER_OBJECTIVE] is None:
            # Classical MCS
            A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p, c_p, z_map_constr_ineq_p, z_map_constr_eq_p, z_map_vars_p \
                = build_primal_from_cbm(self.model, V_ineq, v_ineq, V_eq, v_eq)
        elif sd_module[MODULE_TYPE] in [PROTECT, SUPPRESS, OPTKNOCK, OPTCOUPLE]:
            c_in = linexprdict2mat(sd_module[INNER_OBJECTIVE], self.model.reactions.list_attr('id'))
            # by default, assume maximization of the inner objective
            if not hasattr(sd_module,INNER_OPT_SENSE) or sd_module[INNER_OPT_SENSE] is None or \
                sd_module[INNER_OPT_SENSE] not in [MINIMIZE, MAXIMIZE] or sd_module[INNER_OPT_SENSE] == MAXIMIZE:
                c_in = -c_in
            c_in = c_in.toarray()[0].tolist()
            # 1. build primal w/ desired constraint (build_primal_from_cbm) - also store variable c
            A_ineq_v, b_ineq_v, A_eq_v, b_eq_v, lb_v, ub_v, c_v, z_map_constr_ineq_v, z_map_constr_eq_v, z_map_vars_v \
                = build_primal_from_cbm(self.model, V_ineq, v_ineq, V_eq, v_eq, c_in)
            # 2. build primal w/o desired constraint (build_primal_from_cbm) - store c_inner
            A_ineq_inner, b_ineq_inner, A_eq_inner, b_eq_inner, lb_inner, ub_inner, c_inner, z_map_constr_ineq_inner, z_map_constr_eq_inner, z_map_vars_inner \
                = build_primal_from_cbm(self.model, V_ineq=None, v_ineq=None, V_eq=None, v_eq=None, c=c_in)
            # 3. build dual from primal w/o desired constraint (build_dual w/o the farkas-option) - store c_inner_dual
            A_ineq_dual, b_ineq_dual, A_eq_dual, b_eq_dual, lb_dual, ub_dual, c_inner_dual, z_map_constr_ineq_dual, z_map_constr_eq_dual, z_map_vars_dual \
                = LP_dualize(A_ineq_inner, b_ineq_inner, A_eq_inner, b_eq_inner, lb_inner, ub_inner, c_inner, z_map_constr_ineq_inner, z_map_constr_eq_inner, z_map_vars_inner)
            # 4. connect primal w/ undesired region and dual w/o undesired region (i.e. biomass) via c = c_inner.
            A_ineq_p = sparse.block_diag((A_ineq_v, A_ineq_dual)).tocsr()
            b_ineq_p = b_ineq_v + b_ineq_dual
            A_eq_p = sparse.vstack((sparse.block_diag((A_eq_v, A_eq_dual)), sparse.hstack((c_v, c_inner_dual)))).tocsr()
            b_eq_p = b_eq_v + b_eq_dual + [0.0]
            lb_p = lb_v + lb_dual
            ub_p = ub_v + ub_dual
            # 5. Update z-associations
            z_map_vars_p = sparse.hstack((z_map_vars_v, z_map_vars_dual))
            z_map_constr_ineq_p = sparse.hstack((z_map_constr_ineq_v, z_map_constr_ineq_dual))
            z_map_constr_eq_p = sparse.hstack((z_map_constr_eq_v, z_map_constr_eq_dual, sparse.csc_matrix((self.num_z, 1))))
        elif sd_module[MODULE_TYPE] == ROBUSTKNOCK:
            # RobustKnock has three layers, inner maximization and an outer min-max problem
            c_in = linexprdict2mat(sd_module[INNER_OBJECTIVE], self.model.reactions.list_attr('id'))
            # by default, assume maximization of the inner objective
            if not hasattr(sd_module,INNER_OPT_SENSE) or sd_module[INNER_OPT_SENSE] is None or \
                sd_module[INNER_OPT_SENSE] not in [MINIMIZE, MAXIMIZE] or sd_module[INNER_OPT_SENSE] == MAXIMIZE:
                c_in = -c_in
            c_in = c_in.toarray()[0].tolist()
            # 1. build primal of inner problem (build_primal_from_cbm) - also store variable c
            A_ineq_v, b_ineq_v, A_eq_v, b_eq_v, lb_v, ub_v, c_v, z_map_constr_ineq_v, z_map_constr_eq_v, z_map_vars_v \
                = build_primal_from_cbm(self.model, V_ineq=None, v_ineq=None, V_eq=None, v_eq=None, c=c_in)
            # 2. build primal of outer problem (base of inner outer problem) (build_primal_from_cbm) - with inner objective
            A_ineq_inner, b_ineq_inner, A_eq_inner, b_eq_inner, lb_inner, ub_inner, c_inner, z_map_constr_ineq_inner, z_map_constr_eq_inner, z_map_vars_inner \
                = build_primal_from_cbm(self.model,V_ineq=None, v_ineq=None, V_eq=None, v_eq=None, c=c_in)
            # 3. build primal of outer problem (outer outer problem) (build_primal_from_cbm) - store c_inner
            c_out = linexprdict2mat(sd_module[OUTER_OBJECTIVE], self.model.reactions.list_attr('id'))  # get outer objective
            if not hasattr(sd_module,OUTER_OPT_SENSE) or sd_module[OUTER_OPT_SENSE] is None or \
                sd_module[OUTER_OPT_SENSE] not in [MINIMIZE, MAXIMIZE] or sd_module[OUTER_OPT_SENSE] == MAXIMIZE:
                c_out = -c_out
            c_out = c_out.toarray()[0].tolist()
            A_ineq_r, b_ineq_r, A_eq_r, b_eq_r, lb_r, ub_r, c_r, z_map_constr_ineq_r, z_map_constr_eq_r, z_map_vars_r \
                = build_primal_from_cbm(self.model,V_ineq, v_ineq, V_eq, v_eq, [-c for c in c_out])
            # 4. build dual of innerst problem - store c_inner_dual
            A_ineq_dual, b_ineq_dual, A_eq_dual, b_eq_dual, lb_dual, ub_dual, c_inner_dual, z_map_constr_ineq_dual, z_map_constr_eq_dual, z_map_vars_dual \
                = LP_dualize(A_ineq_inner, b_ineq_inner, A_eq_inner, b_eq_inner, lb_inner, ub_inner, c_inner, z_map_constr_ineq_inner, z_map_constr_eq_inner, z_map_vars_inner)
            # 5. connect primal w/ undesired region and dual w/o undesired region (i.e. biomass) via c = c_inner.
            A_ineq_p = sparse.block_diag((A_ineq_v, A_ineq_dual)).tocsr()
            b_ineq_p = b_ineq_v + b_ineq_dual
            A_eq_p = sparse.vstack((sparse.block_diag((A_eq_v, A_eq_dual)), sparse.hstack((c_v, c_inner_dual)))).tocsr()
            b_eq_p = b_eq_v + b_eq_dual + [0.0]
            lb_p = lb_v + lb_dual
            ub_p = ub_v + ub_dual
            c_out_in_p = -sparse.csr_matrix(c_out)
            c_out_in_p.resize((1, A_ineq_p.shape[1]))
            c_out_in_p = c_out_in_p.toarray()[0].tolist()
            # 6. Update z-associations
            z_map_vars_p = sparse.hstack((z_map_vars_v, z_map_vars_dual))
            z_map_constr_ineq_p = sparse.hstack((z_map_constr_ineq_v, z_map_constr_ineq_dual))
            z_map_constr_eq_p = sparse.hstack((z_map_constr_eq_v, z_map_constr_eq_dual, sparse.csc_matrix((self.num_z, 1))))
            # 7. Dualize the joint inner problem
            A_ineq_dl_mmx, b_ineq_dl_mmx, A_eq_dl_mmx, b_eq_dl_mmx, lb_dl_mmx, ub_dl_mmx, c_dl_mmx, z_map_constr_ineq_dl_mmx, z_map_constr_eq_dl_mmx, z_map_vars_dl_mmx \
                = LP_dualize(A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p, c_out_in_p, z_map_constr_ineq_p, z_map_constr_eq_p, z_map_vars_p)
            # 8. Connect outer problem to the dualized combined inner problem to construct min-max problem.
            A_ineq_q = sparse.block_diag((A_ineq_r, A_ineq_dl_mmx)).tocsr()
            b_ineq_q = b_ineq_r + b_ineq_dl_mmx
            A_eq_q = sparse.vstack((sparse.block_diag((A_eq_r, A_eq_dl_mmx)), sparse.hstack((c_r, c_dl_mmx)))).tocsr()
            b_eq_q = b_eq_r + b_eq_dl_mmx + [0.0]
            lb_q = lb_r + lb_dl_mmx
            ub_q = ub_r + ub_dl_mmx
            # 9. Update z-associations
            z_map_vars_q = sparse.hstack((z_map_vars_r, z_map_vars_dl_mmx))
            z_map_constr_ineq_q = sparse.hstack((z_map_constr_ineq_r, z_map_constr_ineq_dl_mmx))
            z_map_constr_eq_q = sparse.hstack((z_map_constr_eq_r, z_map_constr_eq_dl_mmx, sparse.csc_matrix((self.num_z, 1))))
            # 10. reassign lb, ub where possible
            A_ineq_i, b_ineq_i, A_eq_i, b_eq_i, lb_i, ub_i, z_map_constr_ineq_i, z_map_constr_eq_i = reassign_lb_ub_from_ineq(
                A_ineq_q, b_ineq_q, A_eq_q, b_eq_q, lb_q, ub_q, z_map_constr_ineq_q, z_map_constr_eq_q, z_map_vars_q)
            z_map_vars_i = z_map_vars_q
            c_i = sparse.csr_matrix(c_out)
            c_i.resize((1, A_ineq_i.shape[1]))
            c_i = c_i.toarray()[0].tolist()
        if sd_module[MODULE_TYPE] == OPTCOUPLE:
            c_p = c_v + [0 for _ in c_inner_dual]
            # (continued from optknock)
            prod_eq = linexprdict2mat(sd_module[PROD_ID], self.model.reactions.list_attr('id'))
            # 6.  build primal system with no production - also store variable c
            A_ineq_r, b_ineq_r, A_eq_r, b_eq_r, lb_r, ub_r, c_r, z_map_constr_ineq_r, z_map_constr_eq_r, z_map_vars_r \
                = build_primal_from_cbm(self.model,V_ineq=None, v_ineq=None, V_eq=prod_eq, v_eq=[0], c=c_in)
            # 7. Dualize no-production system.
            A_ineq_r_dl, b_ineq_dl_r_dl, A_eq_dl_r_dl, b_eq_r_dl, lb_r_dl, ub_r_dl, c_r_dl, z_map_constr_ineq_r_dl, z_map_constr_eq_r_dl, z_map_vars_r_dl \
                = LP_dualize(A_ineq_r, b_ineq_r, A_eq_r, b_eq_r, lb_r, ub_r, c_r, z_map_constr_ineq_r, z_map_constr_eq_r, z_map_vars_r)
            # 8. Create no-production bi-level system.
            A_ineq_b = sparse.block_diag((A_ineq_r, A_ineq_r_dl), format='csr')
            b_ineq_b = b_ineq_r + b_ineq_dl_r_dl
            A_eq_b = sparse.vstack((sparse.block_diag((A_eq_r, A_eq_dl_r_dl)), sparse.hstack((c_r, c_r_dl))), format='csr')
            b_eq_b = b_eq_r + b_eq_r_dl + [0.0]
            lb_b = lb_r + lb_r_dl
            ub_b = ub_r + ub_r_dl
            c_b = c_r + [0.0] * len(c_r_dl)
            z_map_vars_b = sparse.hstack((z_map_vars_r, z_map_vars_r_dl))
            z_map_constr_ineq_b = sparse.hstack((z_map_constr_ineq_r, z_map_constr_ineq_r_dl))
            z_map_constr_eq_b = sparse.hstack((z_map_constr_eq_r, z_map_constr_eq_r_dl, sparse.csc_matrix((self.num_z, 1))))
            # 9. Connect optknock-problem to the no-production-bilevel system to construct the optcouple problem.
            A_ineq_q = sparse.block_diag((A_ineq_p, A_ineq_b), format='csr')
            b_ineq_q = b_ineq_p + b_ineq_b
            A_eq_q = sparse.block_diag((A_eq_p, A_eq_b), format='csr')
            b_eq_q = b_eq_p + b_eq_b
            lb_q = lb_p + lb_b
            ub_q = ub_p + ub_b
            z_map_vars_q = sparse.hstack((z_map_vars_p, z_map_vars_b))
            z_map_constr_ineq_q = sparse.hstack((z_map_constr_ineq_p, z_map_constr_ineq_b))
            z_map_constr_eq_q = sparse.hstack((z_map_constr_eq_p, z_map_constr_eq_b))
            # if minimum growth-coupling potential is specified, enforce it through inequality
            if MIN_GCP in sd_module:
                A_ineq_q = sparse.vstack((A_ineq_q, sparse.lil_matrix(c_p + [-c for c in c_b])), format='csr')
                b_ineq_q = b_ineq_q + [-sd_module[MIN_GCP]]
                z_map_constr_ineq_q = sparse.hstack((z_map_constr_ineq_q, sparse.csc_matrix((self.num_z, 1))))
            # 10. reassign lb, ub where possible
            A_ineq_i, b_ineq_i, A_eq_i, b_eq_i, lb_i, ub_i, z_map_constr_ineq_i, z_map_constr_eq_i = reassign_lb_ub_from_ineq(
                A_ineq_q, b_ineq_q, A_eq_q, b_eq_q, lb_q, ub_q, z_map_constr_ineq_q, z_map_constr_eq_q, z_map_vars_q)
            z_map_vars_i = z_map_vars_q
            # 11. objective: maximize distance between growth rate at production and no production
            c_i = c_p + [-c for c in c_b]

        # 3. Prepare module as undesired, desired or other
        if sd_module[MODULE_TYPE] == PROTECT:
            A_ineq_i, b_ineq_i, A_eq_i, b_eq_i, lb_i, ub_i, z_map_constr_ineq_i, z_map_constr_eq_i = reassign_lb_ub_from_ineq(
                A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p, z_map_constr_ineq_p, z_map_constr_eq_p, z_map_vars_p)
            z_map_vars_i = z_map_vars_p
            c_i = [0 for _ in range(A_ineq_i.shape[1])]
        elif sd_module[MODULE_TYPE] == SUPPRESS:
            A_ineq_i, b_ineq_i, A_eq_i, b_eq_i, lb_i, ub_i, z_map_constr_ineq_i, z_map_constr_eq_i, z_map_vars_i = farkas_dualize(
                A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p, z_map_constr_ineq_p, z_map_constr_eq_p, z_map_vars_p)
            c_i = [0 for _ in range(A_ineq_i.shape[1])]
        elif sd_module[MODULE_TYPE] == OPTKNOCK:
            A_ineq_i, b_ineq_i, A_eq_i, b_eq_i, lb_i, ub_i, z_map_constr_ineq_i, z_map_constr_eq_i = reassign_lb_ub_from_ineq(
                A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p, z_map_constr_ineq_p, z_map_constr_eq_p, z_map_vars_p)
            z_map_vars_i = z_map_vars_p
            # prepare outer objective
            c_i = linexprdict2mat(sd_module[OUTER_OBJECTIVE], self.model.reactions.list_attr('id'))
            c_i.resize((1, A_ineq_i.shape[1]))
            if OUTER_OPT_SENSE not in sd_module or sd_module[OUTER_OPT_SENSE] is None or \
                sd_module[OUTER_OPT_SENSE] not in [MINIMIZE, MAXIMIZE] or sd_module[OUTER_OPT_SENSE] == MAXIMIZE:
                c_i = -c_i
            c_i = c_i.toarray()[0].tolist()

        # 3. Add module to global MILP
        self.z_map_constr_ineq = sparse.hstack((self.z_map_constr_ineq, z_map_constr_ineq_i)).tocsc()
        self.z_map_constr_eq = sparse.hstack((self.z_map_constr_eq, z_map_constr_eq_i)).tocsc()
        self.z_map_vars = sparse.hstack((self.z_map_vars, z_map_vars_i)).tocsc()
        self.A_ineq = sparse.bmat([[self.A_ineq, None], [None, A_ineq_i]]).tocsr()
        self.b_ineq += b_ineq_i
        self.A_eq = sparse.bmat([[self.A_eq, None], [None, A_eq_i]]).tocsr()
        self.b_eq += b_eq_i
        self.c += c_i
        self.lb += lb_i
        self.ub += ub_i

[docs]    def link_z(self):
        """Connect binary intervention variables to variables and constraints of the strain design problem
        
        Function that uses the maps between intervention indicators z and variables and constraints
        of the linear strain design (in)equality system (self.z_map_constr_ineq, self.z_map_constr_eq and 
        self.z_map_vars) to set up the strain design MILP.
        
        MILP construction uses the following steps:
        
        (1) Translate equality-KOs/KIs to two inequality-KOs/KIs
        (2) Translate variable-KOs/KIs to inequality-KIs/KOs
        (3) Try to bound the problem with LPs
        (4) Use LP-determined bounds to link z-variables, where such bounds were found
        (5) Translate remaining inequalities back to equalities when possible and link z via indicator constraints.
        If necessary, the solver interface will translate them to big-M constraints.
        (6) Remove redundant equalities from static problem
        """

        # 1. Split knockable equality constraints into foward and reverse direction
        knockable_constr_eq = self.z_map_constr_eq.nonzero()[1]  # first array: z, second array: eq constr
        eq_constr_A = sparse.vstack((self.A_eq[knockable_constr_eq, :], -self.A_eq[knockable_constr_eq, :]))
        eq_constr_b = [self.b_eq[i] for i in knockable_constr_eq] + [-self.b_eq[i] for i in knockable_constr_eq]
        z_eq = self.z_map_constr_eq[:, tuple(knockable_constr_eq) * 2]
        # Add knockable inequalities to global A_ineq matrix
        self.A_ineq = sparse.vstack((self.A_ineq, eq_constr_A)).tocsr()
        self.b_ineq += eq_constr_b
        self.z_map_constr_ineq = sparse.hstack((self.z_map_constr_ineq, z_eq)).tocsc()
        # Remove knockable equalities from A_eq
        n_rows_eq = self.A_eq.shape[0]
        self.A_eq = self.A_eq[[False if i in knockable_constr_eq else True for i in range(0, n_rows_eq)]]
        self.b_eq = [self.b_eq[i] for i in range(0, len(self.b_eq)) if i not in knockable_constr_eq]
        self.z_map_constr_eq = self.z_map_constr_eq[:, [False if i in knockable_constr_eq else True for i in range(0, n_rows_eq)]]

        # 2. Translate all variable knockouts to inequality knockouts
        numvars = self.A_ineq.shape[1]
        knockable_vars = self.z_map_vars.nonzero()  # first array: z, second array: x
        knockable_vars_geq0 = [i for i in knockable_vars[1] if self.ub[i] > 0]
        knockable_vars_leq0 = [i for i in knockable_vars[1] if self.lb[i] < 0]
        ub_constr_A = sparse.csr_matrix(([1] * len(knockable_vars_geq0), (range(0, len(knockable_vars_geq0)), knockable_vars_geq0)),
                                        [len(knockable_vars_geq0), numvars])
        ub_constr_b = [0 for _ in knockable_vars_geq0]
        lb_constr_A = sparse.csr_matrix(([-1] * len(knockable_vars_leq0), (range(0, len(knockable_vars_leq0)), knockable_vars_leq0)),
                                        [len(knockable_vars_leq0), numvars])
        lb_constr_b = [0 for _ in knockable_vars_leq0]
        bnd_constr_A = sparse.vstack((ub_constr_A, lb_constr_A)).tocsr()
        bnd_constr_b = ub_constr_b + lb_constr_b
        var_kos = [knockable_vars[0][(knockable_vars[1] == i).nonzero()[0][0]] for i in knockable_vars_geq0 + knockable_vars_leq0]
        z_lb_ub = -self.z_map_vars[:, knockable_vars_geq0 + knockable_vars_leq0]
        # add constraints to main problem
        self.A_ineq = sparse.vstack((self.A_ineq, bnd_constr_A)).tocsr()
        self.b_ineq += bnd_constr_b
        self.z_map_constr_ineq = sparse.hstack((self.z_map_constr_ineq, z_lb_ub)).tocsc()

        # 3. Use LP to identify M-values for knockable constraints
        #    For this purpose, first construct a most relaxed LP-model (use all possible constraint-KOs, no possible var-KOs)
        knockable_constr_ineq = np.sort(self.z_map_constr_ineq.nonzero()[1])

        cont_vars = [False if i in self.idx_z else True for i in range(0, numvars)]
        M_A_ineq = self.A_ineq[[False if i in knockable_constr_ineq else True for i in range(0, self.A_ineq.shape[0])], :][:, cont_vars]
        M_b_ineq = [self.b_ineq[i] for i in range(0, self.A_ineq.shape[0]) if i not in knockable_constr_ineq]
        M_A_eq = self.A_eq[:, cont_vars]
        M_b_eq = self.b_eq.copy()
        M_lb = [self.lb[i] for i in np.nonzero(cont_vars)[0]]
        M_ub = [self.ub[i] for i in np.nonzero(cont_vars)[0]]
        # M_A contains a list of all knockable constraints. We need to maximize their value (M_A(i)*x) to get a good M
        # M_A(i)*x - z*M <= b
        # M*x <= b+z*M
        #     or
        # M_A(i)*x + z*M <= b + M
        # b is the right hand side value
        M_A = self.A_ineq[[True if i in knockable_constr_ineq else False for i in range(0, self.A_ineq.shape[0])], :][:, cont_vars]
        M_A = [(M_A[i, :])[0].toarray()[0] for i in range(M_A.shape[0])]
        M_b = [self.b_ineq[i] for i in range(0, self.A_ineq.shape[0]) if i in knockable_constr_ineq]

        processes = Configuration().processes
        num_Ms = len(M_A)
        processes = min(processes, num_Ms)

        max_Ax = [np.nan] * num_Ms

        # Dummy to check if optimization runs
        # worker_init(M_A,M_A_ineq,M_b_ineq,M_A_eq,M_b_eq,M_lb,M_ub,list(solvers.keys())[0])
        # worker_compute(1)

        logging.info('  Bounding MILP.')
        if processes > 1 and num_Ms > 1000:
            with SDPool(processes, initializer=worker_init,
                        initargs=(M_A, M_A_ineq, M_b_ineq, M_A_eq, M_b_eq, M_lb, M_ub, self.solver)) as pool:
                chunk_size = num_Ms // processes
                for i, value in pool.imap_unordered(worker_compute, range(num_Ms), chunksize=chunk_size):
                    max_Ax[i] = value
        else:
            worker_init(M_A, M_A_ineq, M_b_ineq, M_A_eq, M_b_eq, M_lb, M_ub, self.solver)
            for i in range(num_Ms):
                _, max_Ax[i] = worker_compute(i)

        # round Ms up to 5 digits
        Ms = [np.ceil(M * 1e5) / 1e5 if not np.isinf(M) else self.M for M in max_Ax]
        # fill up M-vector also for notknockable reactions
        Ms = [
            Ms[np.array([i == j
                         for j in knockable_constr_ineq]).nonzero()[0][0]] if i in knockable_constr_ineq else np.nan
            for i in range(self.A_ineq.shape[0])
        ]

        # 4. Link constraints to z-variables for available upper bounds
        self.z_map_constr_ineq = self.z_map_constr_ineq.tocsc()
        self.A_ineq = self.A_ineq.todok()
        # iterate through knockable constraints
        for row in range(self.A_ineq.shape[0]):
            if not np.isinf(Ms[row]) and not np.isnan(Ms[row]):  # if there is a real number for M, use this for KO
                z_i = self.z_map_constr_ineq[:, row].nonzero()[0][0]
                sense = self.z_map_constr_ineq[z_i, row]
                if sense > 0:  # This means z_i = 1 knocks out ineq:
                    #     a_ineq*x - M*z <= b
                    self.A_ineq[row, z_i] = -Ms[row] + self.b_ineq[row]
                else:  # This means z_i = 0 knocks out ineq:
                    #     a_ineq*x + (M-b)*z <= M  (from a_ineq*x + M*z <= M + b)
                    self.A_ineq[row, z_i] = Ms[row] - self.b_ineq[row]
                    self.b_ineq[row] = Ms[row]
        self.z_map_constr_ineq = self.z_map_constr_ineq.tocsc()

        # 5. Translate back remaining inequalities to equations if applicable and link via indicator constraints
        knockable_constr_ineq = tuple(knockable_constr_ineq)
        knockable_constr_ineq_ic = [i for i in range(self.A_ineq.shape[0]) if np.isinf(Ms[i])]
        self.A_ineq = self.A_ineq.tocsr()

        # approach to find inequalities that can be lumped:
        # - construct a matrix from A_ineq, b_ineq, z_ineq for knockable constraints
        #   where every first entry of a row is positive
        # - search for row duplicates
        # - delete one if their first row entry had the same sign, lump to equality if they had opposite signs
        first_entry_A_ineq_sign = [np.sign(a.data[0]) if a.nnz > 0 else 0 for a in self.A_ineq]
        Ab_find_dupl = sparse.hstack((sparse.diags(first_entry_A_ineq_sign) * self.A_ineq, \
                                      sparse.coo_matrix(
                                          sparse.diags(first_entry_A_ineq_sign) * self.b_ineq).transpose(), \
                                      self.z_map_constr_ineq.transpose())) \
            .tocsr()
        # find rows that are identical
        ident_rows = []  # stores duplicate rows as Tuple (i,j,k): first row, second row, positive or negative duplicate
        for i, a1 in enumerate(Ab_find_dupl[range(Ab_find_dupl.shape[0] - 1)]):
            if i in knockable_constr_ineq_ic:  # only compare knockable ineqs
                for j, a2 in enumerate(Ab_find_dupl):
                    if j in knockable_constr_ineq_ic and j > i:  # only compare knockable ineqs
                        if a1.nnz == a2.nnz:
                            if all(a1.indices == a2.indices) & all(a1.data == a2.data):
                                ident_rows += [(i, j, first_entry_A_ineq_sign[i] * first_entry_A_ineq_sign[j])]
        # replace two ineqs by one eq
        self.z_map_constr_ineq = self.z_map_constr_ineq.tocsc()
        A_eq = sparse.csr_matrix((0, self.A_ineq.shape[1]))
        z_eq = sparse.csc_matrix((self.num_z, 0))
        b_eq = []
        for j in [i for i in range(len(ident_rows)) if ident_rows[i][2] == -1]:
            A_eq = sparse.vstack((A_eq, self.A_ineq[ident_rows[j][0]]))
            b_eq += [self.b_ineq[ident_rows[j][0]]]
            z_eq = sparse.hstack((z_eq, self.z_map_constr_ineq[:, ident_rows[j][0]]))
        # Add to global equality problem part
        knockable_constr_eq_ic = [i + len(self.b_eq) for i in range(len(b_eq))]
        self.A_eq = sparse.vstack((self.A_eq, A_eq), 'csr')
        self.b_eq += b_eq
        self.z_map_constr_eq = sparse.hstack((self.z_map_constr_eq, z_eq)).tocsc()

        # Remove all duplicates from ineq
        if ident_rows == []:
            remove_ineq = np.array([], 'int')
        else:
            remove_ineq = np.unique(np.hstack([[ir[0], ir[1]] if ir[2] == -1 else [ir[1]] for ir in ident_rows]))
        keep_ineq = [i for i in range(self.A_ineq.shape[0]) if i not in remove_ineq]
        knockable_constr_ineq_ic = [True if i in knockable_constr_ineq_ic else False for i in range(self.A_ineq.shape[0])]
        knockable_constr_ineq_ic = np.nonzero([knockable_constr_ineq_ic[i] for i in keep_ineq])[0]
        self.A_ineq = self.A_ineq[keep_ineq, :]
        self.b_ineq = [self.b_ineq[i] for i in keep_ineq]
        self.z_map_constr_ineq = self.z_map_constr_ineq[:, keep_ineq]

        # b5. Link remaining (in)equalities to z via indicator constraints
        #    and remove them from the static problem
        #    - first ineqs then eqs
        ic_binv = np.append(self.z_map_constr_ineq[:, knockable_constr_ineq_ic].indices,
                            self.z_map_constr_eq[:, knockable_constr_eq_ic].indices)
        ic_A = sparse.vstack((self.A_ineq[knockable_constr_ineq_ic, :], self.A_eq[knockable_constr_eq_ic, :]))
        ic_b = [self.b_ineq[i] for i in knockable_constr_ineq_ic] + [self.b_eq[i] for i in knockable_constr_eq_ic]
        ic_sense = 'L' * len(knockable_constr_ineq_ic) + 'E' * len(knockable_constr_eq_ic)
        ic_indicval = np.append(self.z_map_constr_ineq[:, knockable_constr_ineq_ic].data, self.z_map_constr_eq[:,
                                                                                                               knockable_constr_eq_ic].data)
        ic_indicval = [0 if i == 1 else 1 for i in ic_indicval]
        # in z-maps: -1 => z=1 -> A_ineq*x <= b_ineq
        #             1 => z=0 -> A_ineq*x <= b_ineq
        self.indic_constr = IndicatorConstraints(ic_binv, ic_A, ic_b, ic_sense, ic_indicval)

        # b6. Remove knockable (in)equalities from static problem, as they are now indicator constraints
        keep_ineq = [False if i in knockable_constr_ineq_ic else True for i in range(self.A_ineq.shape[0])]
        self.A_ineq = self.A_ineq[keep_ineq, :]
        self.b_ineq = [self.b_ineq[i] for i in range(len(keep_ineq)) if keep_ineq[i]]
        keep_eq = [False if i in knockable_constr_eq_ic else True for i in range(self.A_eq.shape[0])]
        self.A_eq = self.A_eq[keep_eq, :]
        self.b_eq = [self.b_eq[i] for i in range(len(keep_eq)) if keep_eq[i]]


[docs]class ContMILP:
    """Continuous representation of the strain design MILP.
    
    This MILP can be used to verify computation results. Since this class also stores the relationship
    between intervention variables z and corresponding (in)equality constraints and variables in the 
    problem, it can be used to verify computed designs quickly and in a numerically stable manner."""

    def __init__(self, A_ineq, b_ineq, A_eq, b_eq, lb, ub, c, z_map_constr_ineq, z_map_constr_eq, z_map_vars):
        self.A_ineq = A_ineq
        self.b_ineq = b_ineq
        self.A_eq = A_eq
        self.b_eq = b_eq
        self.lb = lb
        self.ub = ub
        self.c = c
        self.z_map_constr_ineq = z_map_constr_ineq
        self.z_map_constr_eq = z_map_constr_eq
        self.z_map_vars = z_map_vars

[docs]def build_primal_from_cbm(model, V_ineq=None, v_ineq=None, V_eq=None, v_eq=None, c=None) -> \
        Tuple[sparse.csr_matrix, Tuple, sparse.csr_matrix, Tuple, Tuple, Tuple, sparse.csr_matrix, sparse.csr_matrix, sparse.csr_matrix]:
    """Builds primal LP from constraint-based model and (optionally) additional constraints.
    
    standard form: A_ineq x <= b_ineq, A_eq x = b_eq, lb <= x <= ub, min{c'x}. Additionally,
    this function also returns a set of matrices that associate each variable (and constraint)
    with reactions. In the primal problems all variables correspond to reactions (z), therefore,
    the z_map_vars matrix is an identity matrix. The constraints correspond to metabolites, thus
    z_map_constr_ineq, z_map_constr_eq are all-zero.
    
    Args:
        model (cobra.Model):
            A metabolic model that is an instance of the cobra.Model class
            
        V_ineq, v_ineq (sparse.csr_matrix, list of float):
            Linear inequality constraints of the form V_ineq*x <= v_ineq. Ensure that the number
            of columns in V_ineq is identical to the number of reactions in the model.
            
        V_eq, v_eq (sparse.csr_matrix, list of float):
            Linear equality constraints of the form V_eq*x = v_eq. Ensure that the number
            of columns in V_eq is identical to the number of reactions in the model.
            
        c (list of float):
            Object coefficient vector (same lenght as variable vector).
    
    Returns:
        (Tuple):
        A_ineq, b_ineq, A_eq, b_eq, lb, ub, c, z_map_constr_ineq, z_map_constr_eq, z_map_vars.
        A constraint-based steady-state model in the form of a linear (in)equality system. The matrices
        z_map_constr_ineq, z_map_constr_eq, z_map_vars contain the association between reactions and
        different parts of the LP, such as reactions, metabolites or other (in)equalities.
    """
    numr = len(model.reactions)
    # initialize matices (if not provided in function call)
    if V_ineq is None or v_ineq is None:
        V_ineq = sparse.csr_matrix((0, numr))
        v_ineq = []
    if V_eq is None or v_eq is None:
        V_eq = sparse.csr_matrix((0, numr))
        v_eq = []
    if c is None:
        c = [i.objective_coefficient for i in model.reactions]
    S = sparse.csr_matrix(create_stoichiometric_matrix(model))
    # fill matrices
    A_eq = sparse.vstack((S, V_eq))
    b_eq = [0 for _ in range(S.shape[0])] + v_eq
    A_ineq = V_ineq.copy()
    b_ineq = v_ineq.copy()
    lb = [v.lower_bound for v in model.reactions]
    ub = [v.upper_bound for v in model.reactions]
    z_map_vars = sparse.identity(numr, 'd', format="csc")
    z_map_constr_eq = sparse.csc_matrix((numr, A_eq.shape[0]))
    z_map_constr_ineq = sparse.csc_matrix((numr, A_ineq.shape[0]))
    A_ineq, b_ineq, lb, ub, z_map_constr_ineq = prevent_boundary_knockouts(A_ineq, b_ineq, lb.copy(), ub.copy(), z_map_constr_ineq,
                                                                           z_map_vars)
    return A_ineq, b_ineq, A_eq, b_eq, lb, ub, c, z_map_constr_ineq, z_map_constr_eq, z_map_vars

[docs]def LP_dualize(A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p, c_p,
            z_map_constr_ineq_p=None, z_map_constr_eq_p=None, z_map_vars_p=None) -> \
        Tuple[sparse.csr_matrix, Tuple, sparse.csr_matrix, Tuple, Tuple, Tuple, sparse.csr_matrix, sparse.csr_matrix, sparse.csr_matrix]:
    """Translate a primal system to its LP dual system
    
    The primal system must be given in the standard form: A_ineq x <= b_ineq, A_eq x = b_eq, lb <= x < ub, min{c'x}. The LP duality
    theorem defines a set of two problems. If one of the LPs is a maximization and and optimum exists, the optimal value of this LP
    is identical to the minimal optimum of its LP dual problem. LP duality can be used for nested optimization, since solving the 
    primal and the LP dual problem, while enfocing equality of the objective value, guarantees optimality.
    
    Construction of the LP dual:    
        Variables translate to constraints:
            x={R} ->   =
            x>=0  ->  >= (new constraint is multiplied with -1 to translate to <= e.g. -A_i' y <= -c_i)
            x<=0  ->  <=
        Constraints translate to variables:
            =     ->   y={R}
            <=    ->   y>=0
            
    Args:
        A_ineq_p, b_ineq_p (sparse.csr_matrix and list of float): 
            A coefficient matrix and a vector that describe the linear inequalities of the primal LP A_ineq_p*x <= b_ineq_p
            
        A_eq_p, b_eq_p (sparse.csr_matrix and list of float): 
            A coefficient matrix and a vector that describe the linear equalities of the primal LP A_eq_p*x <= b_eq_p
            
        lb_p, ub_p (list of float):
            Upper and lower variable bounds in vector form.
            
        c_p (list of float):
            The objective coefficient vector of the primal minimization-LP.
            z_map_constr_ineq_p, z_map_constr_eq_p, z_map_vars_p
                    
        z_map_constr_ineq, z_map_constr_eq, z_map_vars (optional (sparse.csr_matrix)):
            Matrices that contain the relationship between metabolic reactions and different parts of the LP, such as reactions, 
            metabolites or other (in)equalities. These matrices help keeping track of the parts of the LP that are affected by
            reaction knockouts and additions. When a reaction (i) knockout removes the variable or constraint (j), the respective
            matrix contains a coefficient 1 at this position. -1 marks additions. E.g.: If the knockout of reaction i corresponds
            to the removal of inequality constraint j, there is a matrix entry z_map_constr_ineq_(i,j) = 1. If these matrices are
            provided, they are updated for the dualized LP, if not, the dual problem is constructed without returning information 
            about these relationships.
            
    Returns
        (Tuple):
        The LP dual of the problem in the format: A_ineq, b_ineq, A_eq, b_eq, c, lb, ub
        and optionally also z_map_constr_ineq, z_map_constr_eq, z_map_vars
    """
    numr = A_ineq_p.shape[1]
    numz = max([z.shape[0] for z in [z_map_vars_p, z_map_constr_eq_p, z_map_constr_ineq_p] if z is not None])

    if numz:
        if z_map_vars_p is None:
            z_map_vars_p = sparse.csc_matrix((numz, A_ineq_p.shape[1]))
        if z_map_constr_eq_p is None:
            z_map_constr_eq_p = sparse.csc_matrix((numz, A_eq_p.shape[0]))
        if z_map_constr_ineq_p is None:
            z_map_constr_ineq_p = sparse.csc_matrix((numz, A_ineq_p.shape[0]))

    # knockouts of variables and constraints must not overlap in the problem matrix
    if not len(
            A_eq_p[[True if i in z_map_constr_eq_p.nonzero()[1] else False for i in range(0, A_eq_p.shape[0])], :] \
                    [:,
            [True if i in z_map_vars_p.nonzero()[1] else False for i in range(0, A_eq_p.shape[1])]].nonzero()[
                0]) == 0 \
            or not len(
        A_ineq_p[[True if i in z_map_constr_ineq_p.nonzero()[1] else False for i in range(0, A_ineq_p.shape[0])], :] \
                [:,
        [True if i in z_map_vars_p.nonzero()[1] else False for i in range(0, A_ineq_p.shape[1])]].nonzero()[
            0]) == 0:
        raise Exception(
            "knockouts of variables and constraints must not overlap in the problem matrix. Something went wrong during the construction of the primal problem."
        )

    if c_p == []:
        c_p = [0.0] * numr

    # Translate inhomogenous bounds into inequality constraints
    lb_inh_bounds = [i for i in np.nonzero(lb_p)[0] if not np.isinf(lb_p[i])]
    ub_inh_bounds = [i for i in np.nonzero(ub_p)[0] if not np.isinf(ub_p[i])]
    x_geq0 = np.nonzero(np.greater_equal(lb_p, 0) & np.greater(ub_p, 0))[0]
    x_eR = np.nonzero(np.greater(0, lb_p) & np.greater(ub_p, 0))[0]
    x_leq0 = np.nonzero(np.greater(0, lb_p) & np.greater_equal(0, ub_p))[0]

    LB = sparse.csr_matrix((len(lb_inh_bounds) * [-1], (range(0, len(lb_inh_bounds)), lb_inh_bounds)), shape=(len(lb_inh_bounds), numr))
    UB = sparse.csr_matrix((len(ub_inh_bounds) * [1], (range(0, len(ub_inh_bounds)), ub_inh_bounds)), shape=(len(ub_inh_bounds), numr))
    A_ineq_p = sparse.vstack((A_ineq_p, LB, UB))
    b_ineq_p = b_ineq_p + [-lb_p[i] for i in lb_inh_bounds] + [ub_p[i] for i in ub_inh_bounds]

    # Translate into dual system
    # Transpose parts of matrices with variables >=0,<=0 to put them into A_ineq
    A_ineq = sparse.vstack((sparse.hstack((np.transpose(-A_eq_p[:, x_geq0]), np.transpose(-A_ineq_p[:, x_geq0]))), \
                            sparse.hstack(
                                (np.transpose(A_eq_p[:, x_leq0]), np.transpose(A_ineq_p[:, x_leq0]))))).tocsr()
    b_ineq = [c_p[i] for i in x_geq0] + [-c_p[i] for i in x_leq0]
    A_eq = sparse.hstack((np.transpose(A_eq_p[:, x_eR]), np.transpose(A_ineq_p[:, x_eR]))).tocsr()
    b_eq = [c_p[i] for i in x_eR]
    lb = [-np.inf] * A_eq_p.shape[0] + [0.0] * A_ineq_p.shape[0]
    ub = [np.inf] * (A_eq_p.shape[0] + A_ineq_p.shape[0])
    c = b_eq_p + b_ineq_p

    if numz:
        # translate mapping of z-variables to rows instead of columns
        z_map_constr_ineq = sparse.hstack((z_map_vars_p[:, x_geq0], z_map_vars_p[:, x_leq0])).tocsc()
        z_map_constr_eq = z_map_vars_p[:, x_eR]
        z_map_vars = sparse.hstack(
            (z_map_constr_eq_p, z_map_constr_ineq_p, sparse.csc_matrix((numz, len(lb_inh_bounds) + len(ub_inh_bounds))))).tocsc()
        A_ineq, b_ineq, A_eq, b_eq, lb, ub, z_map_constr_ineq, z_map_constr_eq = \
            reassign_lb_ub_from_ineq(A_ineq, b_ineq, A_eq, b_eq, lb, ub,  z_map_constr_ineq, z_map_constr_eq, z_map_vars)
        return A_ineq, b_ineq, A_eq, b_eq, lb, ub, c, z_map_constr_ineq, z_map_constr_eq, z_map_vars
    else:
        A_ineq, b_ineq, A_eq, b_eq, lb, ub = reassign_lb_ub_from_ineq(A_ineq, b_ineq, A_eq, b_eq, lb, ub)
        return A_ineq, b_ineq, A_eq, b_eq, lb, ub, c

[docs]def farkas_dualize(A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p,
                  z_map_constr_ineq_p=None, z_map_constr_eq_p=None, z_map_vars_p=None) -> \
        Tuple[sparse.csr_matrix, Tuple, sparse.csr_matrix, Tuple, Tuple, sparse.csr_matrix, sparse.csr_matrix, sparse.csr_matrix]:
    """Translate a primal system of linear (in)equality to its Farkas dual
    
    The primal system must be given in the standard form: A_ineq x <= b_ineq, A_eq x = b_eq, lb <= x < ub. Farkas' lemma
    defines a set of two systems of linear (in)equalities of which exactly one is feasible. Since the feasibility of one is a
    certificate for the infeasibility of the other one, this theorem can be used to set up problems that imply the infeasibility
    and thus exclusion of a certain subspace. This priciple is used for MCS calculation (the SUPPRESS module).
    
    Consider that the following is not implemented:
    In the case of (1) A x = b, (2) x={R}, (3) b~=0, Farkas' lemma is special,
    because b'y ~= 0 is required to make the primal infeasible instead of b'y < 0.
    1. This does not occur very often.
    2. Splitting the equality into two inequalities that translate to y>=0
    would be posible, and yield b'y < 0 in the farkas' lemma.
    Maybe splitting is required, but I actually don't think so. Using the
    special case of b'y < 0 for b'y ~= 0 should be enough.
    
    Args:
        A_ineq_p, b_ineq_p (sparse.csr_matrix and list of float): 
            A coefficient matrix and a vector that describe the linear inequalities of the primal LP A_ineq_p*x <= b_ineq_p
            
        A_eq_p, b_eq_p (sparse.csr_matrix and list of float): 
            A coefficient matrix and a vector that describe the linear equalities of the primal LP A_eq_p*x <= b_eq_p
            
        lb_p, ub_p (list of float):
            Upper and lower variable bounds in vector form.
                    
        z_map_constr_ineq, z_map_constr_eq, z_map_vars (optional (sparse.csr_matrix)):
            Matrices that contain the relationship between metabolic reactions and different parts of the LP, such as reactions, 
            metabolites or other (in)equalities. These matrices help keeping track of the parts of the LP that are affected by
            reaction knockouts and additions. When a reaction (i) knockout removes the variable or constraint (j), the respective
            matrix contains a coefficient 1 at this position. -1 marks additions. E.g.: If the knockout of reaction i corresponds
            to the removal of inequality constraint j, there is a matrix entry z_map_constr_ineq_(i,j) = 1. If these matrices are
            provided, they are updated for the dualized LP, if not, the dual problem is constructed without returning information 
            about these relationships.
            
    Returns
        (Tuple):
        The Farkas dual of the linear (in)equality system in the format: A_ineq, b_ineq, A_eq, b_eq, lb, ub
        and optionally also z_map_constr_ineq, z_map_constr_eq, z_map_vars
    """
    numz = max([0] + [z.shape[0] for z in [z_map_vars_p, z_map_constr_eq_p, z_map_constr_ineq_p] if z is not None])
    c_p = [0 for _ in range(A_ineq_p.shape[1])]

    if numz:
        A_ineq_d, b_ineq_d, A_eq_d, b_eq_d, lb_f, ub_f, c_d, z_map_constr_ineq_d, z_map_constr_eq_f, z_map_vars_f = LP_dualize(
            A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p, c_p, z_map_constr_ineq_p, z_map_constr_eq_p, z_map_vars_p)
    else:
        A_ineq_d, b_ineq_d, A_eq_d, b_eq_d, lb_f, ub_f, c_d = LP_dualize(A_ineq_p, b_ineq_p, A_eq_p, b_eq_p, lb_p, ub_p, c_p)
    # add constraint b_prim'y or (c_dual'*y) <= -1;
    A_ineq_f = sparse.vstack((A_ineq_d, c_d)).tocsr()
    b_ineq_f = b_ineq_d + [-1]
    A_eq_f = A_eq_d
    b_eq_f = b_eq_d
    # it would also be possible (but ofc not necessary) to force (c_dual*y) == -1; instead
    # A_eq = sparse.vstack((A_eq,c_dual)).tocsr()
    # b_eq += [-1]
    if numz:
        z_map_constr_ineq_f = sparse.hstack((z_map_constr_ineq_d, sparse.csr_matrix((numz, 1)))).tocsc()
        # z_map_constr_eq_f = sparse.hstack((z_map_constr_eq_d,sparse.csr_matrix((numz,1)))).tocsc()
        return A_ineq_f, b_ineq_f, A_eq_f, b_eq_f, lb_f, ub_f, z_map_constr_ineq_f, z_map_constr_eq_f, z_map_vars_f
    else:
        return A_ineq_f, b_ineq_f, A_eq_f, b_eq_f, lb_f, ub_f, z_map_constr_ineq_f, z_map_constr_eq_f, z_map_vars_f

[docs]def reassign_lb_ub_from_ineq(A_ineq, b_ineq, A_eq, b_eq, lb, ub,
                             z_map_constr_ineq=None, z_map_constr_eq=None, z_map_vars=None) -> \
        Tuple[sparse.csr_matrix, Tuple, sparse.csr_matrix, Tuple, Tuple, Tuple, sparse.csr_matrix, sparse.csr_matrix]:
    """Remove single constraints in A_ineq or A_eq in favor of lower and upper bounds on variables
    
    Constraints on single variables instead translated into lower and upper bounds (lb, ub). This
    is useful to filter out redundant bounds on variables and keep the (in)equality system concise. 
    To avoid interference with the knock-out logic, negative upper bounds and positive lower bounds 
    are not put into lb and ub, when reactions are flagged knockable with z_map_vars.
    
    Args:
        A_ineq_p, b_ineq_p (sparse.csr_matrix and list of float): 
            A coefficient matrix and a vector that describe the linear inequalities of the primal LP A_ineq_p*x <= b_ineq_p
            
        A_eq_p, b_eq_p (sparse.csr_matrix and list of float): 
            A coefficient matrix and a vector that describe the linear equalities of the primal LP A_eq_p*x <= b_eq_p
            
        lb_p, ub_p (list of float):
            Upper and lower variable bounds in vector form.
            
        z_map_constr_ineq, z_map_constr_eq, z_map_vars (optional (sparse.csr_matrix)):
            Matrices that contain the relationship between metabolic reactions and different parts of the LP, such as reactions, 
            metabolites or other (in)equalities. These matrices help keeping track of the parts of the LP that are affected by
            reaction knockouts and additions. When a reaction (i) knockout removes the variable or constraint (j), the respective
            matrix contains a coefficient 1 at this position. -1 marks additions. E.g.: If the knockout of reaction i corresponds
            to the removal of inequality constraint j, there is a matrix entry z_map_constr_ineq_(i,j) = 1. If these matrices are
            provided, they are updated. Otherwise, all reactions are assumed to be notknockable and thus all constraints on 
            single variables put into lb and ub.
    
    Returns
        (Tuple):
        A linear (in)equality system in the format: A_ineq, b_ineq, A_eq, b_eq, lb, ub
        and optionally also updated z_map_constr_ineq, z_map_constr_eq
    """
    lb = [[l] for l in lb]
    ub = [[u] for u in ub]
    numz = max([0] + [z.shape[0] for z in [z_map_vars, z_map_constr_eq, z_map_constr_ineq] if z is not None])
    numr = A_ineq.shape[1]

    if z_map_vars is None:
        z_map_vars = sparse.csc_matrix((numz, numr))

    # translate entries to lb or ub
    # find all entries in A_ineq
    row_ineq = A_ineq.nonzero()[0]
    # filter for rows with only one entry
    var_bound_constraint_ineq = [i for i in row_ineq if list(row_ineq).count(i) == 1]
    # exclude knockable constraints
    var_bound_constraint_ineq = [i for i in var_bound_constraint_ineq if i not in z_map_constr_ineq.nonzero()[1]]
    # retrieve all bounds from inequality constraints
    for i in var_bound_constraint_ineq:
        idx_r = A_ineq[i, :].nonzero()[1][0]  # get reaction from constraint (column of entry)
        if A_ineq[i, idx_r] > 0:  # upper bound constraint
            ub[idx_r] += [b_ineq[i] / A_ineq[i, idx_r]]
        else:  # lower bound constraint
            lb[idx_r] += [b_ineq[i] / A_ineq[i, idx_r]]

    # find all entries in A_eq
    row_eq = A_eq.nonzero()[0]
    # filter for rows with only one entry
    var_bound_constraint_eq = [i for i in row_eq if list(row_eq).count(i) == 1]
    # exclude knockable constraints
    var_bound_constraint_eq = [i for i in var_bound_constraint_eq if i not in z_map_constr_eq.nonzero()[1]]
    # retrieve all bounds from equality constraints
    # and partly set lb or ub derived from equality constraints, for instance:
    # If x =  5, set ub = 5 and keep the inequality constraint -x <= -5.
    # If x = -5, set lb =-5 and keep the inequality constraint  x <= -5.
    A_ineq_new = sparse.csr_matrix((0, numr))
    b_ineq_new = []
    for i in var_bound_constraint_eq:
        idx_r = A_eq[i, :].nonzero()[1][0]  # get reaction from constraint (column of entry)
        if any(z_map_vars[:, idx_r]):  # if reaction is knockable
            if A_eq[i, idx_r] * b_eq[i] > 0:  # upper bound constraint
                ub[idx_r] += [b_eq[i] / A_eq[i, idx_r]]
                A_ineq_new = sparse.vstack((A_ineq_new, -A_eq[i, :]))
                b_ineq_new += [-b_eq[i]]
            elif A_eq[i, idx_r] * b_eq[i] < 0:  # lower bound constraint
                lb[idx_r] += [b_eq[i] / A_eq[i, idx_r]]
                A_ineq_new = sparse.vstack((A_ineq_new, A_eq[i, :]))
                b_ineq_new += [b_eq[i]]
            else:
                ub[idx_r] += [0.0]
                lb[idx_r] += [0.0]
        else:
            lb[idx_r] += [b_eq[i] / A_eq[i, idx_r]]
            ub[idx_r] += [b_eq[i] / A_eq[i, idx_r]]
    # set tightest bounds (avoid inf)
    lb = [max([i for i in l if not np.isinf(i)] + [np.nan]) for l in lb]
    ub = [min([i for i in u if not np.isinf(i)] + [np.nan]) for u in ub]
    # set if only if no other bound remains
    lb = [-np.inf if np.isnan(l) else l for l in lb]
    ub = [np.inf if np.isnan(u) else u for u in ub]

    # check if bounds are consistent
    if any(np.greater(lb, ub)):
        raise Exception("There is a lower bound that is greater than its upper bound counterpart.")

    # remove constraints that became redundant
    numineq = A_ineq.shape[0]
    A_ineq = A_ineq[[False if i in var_bound_constraint_ineq else True for i in range(0, numineq)]]
    b_ineq = [b_ineq[i] for i in range(0, len(b_ineq)) if i not in var_bound_constraint_ineq]
    z_map_constr_ineq = z_map_constr_ineq[:, [False if i in var_bound_constraint_ineq else True for i in range(0, numineq)]]
    numeq = A_eq.shape[0]
    A_eq = A_eq[[False if i in var_bound_constraint_eq else True for i in range(0, numeq)]]
    b_eq = [b_eq[i] for i in range(0, len(b_eq)) if i not in var_bound_constraint_eq]
    # add equality constraints that transformed to inequality constraints
    A_ineq = sparse.vstack((A_ineq, A_ineq_new))
    b_ineq += b_ineq_new
    if numz:
        z_map_constr_eq = z_map_constr_eq[:, [False if i in var_bound_constraint_eq else True for i in range(0, numeq)]]
        z_map_constr_ineq = sparse.hstack((z_map_constr_ineq, sparse.csc_matrix((numz, A_ineq_new.shape[0]))))
        return A_ineq, b_ineq, A_eq, b_eq, lb, ub, z_map_constr_ineq, z_map_constr_eq
    else:
        return A_ineq, b_ineq, A_eq, b_eq, lb, ub

[docs]def prevent_boundary_knockouts(A_ineq, b_ineq, lb, ub, z_map_constr_ineq, z_map_vars) -> \
        Tuple[sparse.csr_matrix, Tuple, Tuple, Tuple, sparse.csr_matrix]:
    """Put negative lower bounds and positive upper bounds into (notknockable) inequalities
    
    This is a helper function that puts negative lower bounds and positive upper bounds into (not-knockable) 
    inequalities. Later on, one may simulate the knockouts by multiplying the upper and lower bounds with a
    binary variable z. This functions prevents that 
    
    Args:
        A_ineq_p, b_ineq_p (sparse.csr_matrix and list of float): 
            A coefficient matrix and a vector that describe the linear inequalities of the primal LP A_ineq_p*x <= b_ineq_p
            
        lb_p, ub_p (list of float):
            Upper and lower variable bounds in vector form.
            
        z_map_constr_ineq, z_map_vars (optional (sparse.csr_matrix)):
            Matrices that contain the relationship between metabolic reactions and different parts of the LP, such as reactions, 
            metabolites or other (in)equalities. These matrices help keeping track of the parts of the LP that are affected by
            reaction knockouts and additions. When a reaction (i) knockout removes the variable or constraint (j), the respective
            matrix contains a coefficient 1 at this position. -1 marks additions. E.g.: If the knockout of reaction i corresponds
            to the removal of inequality constraint j, there is a matrix entry z_map_constr_ineq_(i,j) = 1. If these matrices are
            provided, they are updated. Otherwise, all reactions are assumed to be knockable and thus all negative upper and 
            positive lower bounds are translated into constraints.           
    
    Returns
        (Tuple):
        A linear (in)equality system in the format: A_ineq, b_ineq, A_eq, b_eq, lb, ub
        and optionally also updated z_map_constr_ineq, z_map_constr_eq
    """
    numr = A_ineq.shape[1]
    numz = max([0] + [z.shape[0] for z in [z_map_vars, z_map_constr_ineq] if z is not None])

    if z_map_vars is None:
        z_map_vars = sparse.csc_matrix((numz, numr))

    for i in range(0, numr):
        if any(z_map_vars[:, i]) and lb[i] > 0:
            A_ineq = sparse.vstack((A_ineq, sparse.csr_matrix(([-1], ([0], [i])), shape=(1, numr))))
            b_ineq += [-lb[i]]
            z_map_constr_ineq = sparse.hstack((z_map_constr_ineq, sparse.csc_matrix((numz, 1))))
            lb[i] = 0.0
        if any(z_map_vars[:, i]) and ub[i] < 0:
            A_ineq = sparse.vstack((A_ineq, sparse.csr_matrix(([1], ([0], [i])), shape=(1, numr))))
            b_ineq += [ub[i]]
            z_map_constr_ineq = sparse.hstack((z_map_constr_ineq, sparse.csc_matrix((numz, 1))))
            ub[i] = 0.0

    return A_ineq, b_ineq, lb, ub, z_map_constr_ineq


[docs]def worker_init(A, A_ineq, b_ineq, A_eq, b_eq, lb, ub, solver):
    """Helper function for determining bounds on linear expressions"""
    global lp_glob
    lp_glob = MILP_LP(A_ineq=A_ineq, b_ineq=b_ineq, A_eq=A_eq, b_eq=b_eq, lb=lb, ub=ub, solver=solver)
    if lp_glob == CPLEX:
        lp_glob.backend.parameters.lpmethod.set(1)
        if Configuration().processes > 1:
            lp_glob.backend.parameters.threads.set(2)
    lp_glob.solver = solver
    lp_glob.A = A


[docs]def worker_compute(i) -> Tuple[int, float]:
    """Helper function for determining bounds on linear expressions"""
    global lp_glob
    # maximize by minimizing negative objective and negating result
    lp_glob.set_objective(-lp_glob.A[i])
    min_cx = -lp_glob.slim_solve()
    return i, min_cx