import numpy as np
from scipy import sparse
def _is_log_transformed_array(
X,
neg_frac_thresh=5e-3,
p95_thresh=100.0,
):
"""Heuristic log-transform check on a dense float array.
Parameters
----------
X : np.ndarray
Dense 2-D float matrix. Non-finite values are ignored.
neg_frac_thresh : float
Fraction of negative values above which the data are
considered log-transformed.
p95_thresh : float
If the 95th percentile is at or below this value the
data are considered log-transformed.
Returns
-------
is_log : bool
``True`` if the matrix looks log-transformed.
stats : dict
Diagnostic statistics used for the decision.
"""
finite = np.isfinite(X)
vals = X[finite]
if vals.size == 0:
raise ValueError("No finite values found.")
frac_negative = float(np.mean(vals < 0))
p95 = float(np.nanpercentile(vals, 95))
p5 = float(np.nanpercentile(vals, 5))
dr_ratio = float(
(p95 - p5) / max(abs(p5), 1e-12)
)
is_log = (
frac_negative >= neg_frac_thresh
or p95 <= p95_thresh
)
stats = dict(
frac_negative=frac_negative,
p95=p95,
p5=p5,
dynamic_range_ratio=dr_ratio,
n_finite=int(vals.size),
)
return bool(is_log), stats