Source code for humancompatible.detect.methods.l_inf.lp_tools

import numpy as np
from random import randrange
import scipy.optimize as optimize



[docs]
def lin_prog_feas(
    hist1: np.ndarray,
    hist2: np.ndarray,
    delta: float,
    num_samples: float = 1.0,
) -> int:
    """Specifies a number of samples as a fraction of the total
    histogram bins and checks whether all the sampled bins satisfy
    
    |hist1 - hist2| <= delta.

    Args:
        hist1 (np.ndarray): 1-D array of histogram bin densities for the full dataset.
        hist2 (np.ndarray): 1-D array of histogram bin densities for the subgroup.
        delta (float): Threshold for the absolute difference |hist1 - hist2|.
        num_samples (float): Fraction of total bins to sample.
            The function draws int(num_samples * (len(hist1) - 1)) random samples.

    Returns:
        int: Status code from `scipy.optimize.linprog`. A status of 0 indicates
             the constraints are feasible (i.e., |hist1 - hist2| <= delta for all
             sampled bins); other codes signal infeasibility or solver errors.
    """
    rand_lst1 = []
    rand_lst2 = []

    for _ in range(0, int(num_samples * (hist1.shape[0] - 1))):
        i = randrange(0, hist1.shape[0] - 1)
        rand_lst1.append(float(hist1[i]))
        rand_lst2.append(float(hist2[i]))

    rand_arr1 = np.expand_dims(np.array(rand_lst1), axis=1)
    rand_arr2 = np.expand_dims(np.array(rand_lst2), axis=1)

    # We are not interested in the optimization itself, but in the
    # feasibility of the problem, therefore the coefficient in the
    # objective function is set to 0 and the only variable (x_0) is
    # fixed at 1
    c = 0
    x0_bounds = (1, 1)

    # Accomodate for the + & - signs of the absolute value in
    # |r_a1 - r_a2| <= delta
    A_ub = np.vstack((rand_arr1, -rand_arr1))
    b_ub = np.vstack((delta + rand_arr2, delta - rand_arr2))

    res = optimize.linprog(c, A_ub, b_ub, bounds=[x0_bounds])
    return res.status