Merge pull request #65 from simon-hirsch:dist_add_log

BerriJ · web-flow · commit 591ed887dd6c · 2025-04-24T10:48:49.000+02:00
Add log pdf/cdf/pmf
diff --git a/src/rolch/base/distribution.py b/src/rolch/base/distribution.py
@@ -114,6 +114,21 @@ def initial_values(
     ) -> np.ndarray:
         """Calculate the initial values for the GAMLSS fit."""
 
+    def quantile(self, q: np.ndarray, theta: np.ndarray) -> np.ndarray:
+        """
+        Compute the quantile function for the given data.
+
+        This is a alias for the `ppf` method.
+
+        Parameters:
+            q (np.ndarray): The quantiles to compute.
+            theta (np.ndarray): The parameters of the distribution.
+
+        Returns:
+            np.ndarray: The quantiles corresponding to the given probabilities.
+        """
+        return self.ppf(q, theta)
+
     @abstractmethod
     def cdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
         """
@@ -179,6 +194,30 @@ def rvs(self, size: int, theta: np.ndarray) -> np.ndarray:
             np.ndarray: A 2D array of random variates with shape (theta.shape[0], size).
         """
 
+    @abstractmethod
+    def logpmf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
+        raise NotImplementedError(
+            "Log PMF is not implemented for continuous distributions."
+        )
+
+    @abstractmethod
+    def logpdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
+        raise NotImplementedError(
+            "Log PDF is not implemented for discrete distributions."
+        )
+
+    @abstractmethod
+    def logcdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
+        """Compute the log of the cumulative distribution function (CDF) for the given data points.
+
+        Parameters:
+            y (np.ndarray): An array of data points at which to evaluate the log CDF.
+            theta (np.ndarray): An array of parameters for the distribution.
+
+        Returns:
+            np.ndarray: An array of log CDF values corresponding to the data points in `y`.
+        """
+
 
 class ScipyMixin(ABC):
 
@@ -236,6 +275,42 @@ def pmf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
     def ppf(self, q: np.ndarray, theta: np.ndarray) -> np.ndarray:
         return self.scipy_dist(**self.theta_to_scipy_params(theta)).ppf(q)
 
+    def logpmf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
+        """Compute the log of the probability mass function (PMF) for the given data points.
+
+        Parameters:
+            y (np.ndarray): An array of data points at which to evaluate the log PMF.
+            theta (np.ndarray): An array of parameters for the distribution.
+
+        Returns:
+            np.ndarray: An array of log PMF values corresponding to the data points in `y`.
+        """
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).logpmf(y)
+
+    def logpdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
+        """Compute the log of the probability density function (PDF) for the given data points.
+
+        Parameters:
+            y (np.ndarray): An array of data points at which to evaluate the log PDF.
+            theta (np.ndarray): An array of parameters for the distribution.
+
+        Returns:
+            np.ndarray: An array of log PDF values corresponding to the data points in `y`.
+        """
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).logpdf(y)
+
+    def logcdf(self, y: np.ndarray, theta: np.ndarray) -> np.ndarray:
+        """Compute the log of the cumulative distribution function (CDF) for the given data points.
+
+        Parameters:
+            y (np.ndarray): An array of data points at which to evaluate the log CDF.
+            theta (np.ndarray): An array of parameters for the distribution.
+
+        Returns:
+            np.ndarray: An array of log CDF values corresponding to the data points in `y`.
+        """
+        return self.scipy_dist(**self.theta_to_scipy_params(theta)).logcdf(y)
+
     def rvs(self, size: int, theta: np.ndarray) -> np.ndarray:
         return (
             self.scipy_dist(**self.theta_to_scipy_params(theta))
diff --git a/src/rolch/estimators/online_gamlss.py b/src/rolch/estimators/online_gamlss.py
@@ -646,7 +646,7 @@ def update(
 
     def _outer_update(self, X, y, w):
         ## for new observations:
-        global_di = -2 * np.log(self.distribution.pdf(y, self.fv))
+        global_di = -2 * self.distribution.logpdf(y, self.fv)
         global_dev = (1 - self.forget[0]) * self.global_dev + global_di
         global_dev_old = global_dev + 1000
         iteration_outer = 0
@@ -696,7 +696,7 @@ def _outer_update(self, X, y, w):
 
     def _outer_fit(self, X, y, w):
 
-        global_di = -2 * np.log(self.distribution.pdf(y, self.fv))
+        global_di = -2 * self.distribution.logpdf(y, self.fv)
         global_dev = np.sum(w * global_di)
         global_dev_old = global_dev + 1000
         iteration_outer = 0
@@ -761,7 +761,7 @@ def _inner_fit(
         dv,
     ):
 
-        di = -2 * np.log(self.distribution.pdf(y, self.fv))
+        di = -2 * self.distribution.logpdf(y, self.fv)
         dv = np.sum(di * w)
         olddv = dv + 1
 
@@ -837,7 +837,7 @@ def _inner_fit(
             eta = X[param] @ self.beta[param].T
             self.fv[:, param] = self.distribution.link_inverse(eta, param=param)
 
-            di = -2 * np.log(self.distribution.pdf(y, self.fv))
+            di = -2 * self.distribution.logpdf(y, self.fv)
             olddv = dv
             dv = np.sum(di * w)
 
@@ -866,7 +866,7 @@ def _inner_update(
         dv,
         param,
     ):
-        di = -2 * np.log(self.distribution.pdf(y, self.fv))
+        di = -2 * self.distribution.logpdf(y, self.fv)
         dv = (1 - self.forget[0]) * self.global_dev + np.sum(di * w)
         olddv = dv + 1
 
@@ -949,7 +949,7 @@ def _inner_update(
 
             olddv = dv
 
-            di = -2 * np.log(self.distribution.pdf(y, self.fv))
+            di = -2 * self.distribution.logpdf(y, self.fv)
             dv = np.sum(di * w) + (1 - self.forget[0]) * self.global_dev
 
             message = f"Outer iteration {iteration_outer}: Fitting Parameter {param}: Inner iteration {iteration_inner}: Current LL {dv}"