Copula 入门

import numpy as np
import matplotlib.pyplot as plt
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()

import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors


[Copula](https://en.wikipedia.org/wiki/Copula_(probability_theory%29) 是一种捕获随机变量之间依赖关系的经典方式。更正式地说，Copula 是一种多元分布 $$C(U_1, U_2, ...., U_n)$$，通过边缘化得到 $$U_i \sim \text{Uniform}(0, 1)$$。

Copula 很有趣，因为我们可以用它来创建任意边缘的多元分布。配方如下：

• 使用概率积分变换将任意连续的 R.V. $$X$$ 变为一个均匀的 $$F_X(X)$$，其中 $$F_X$$ 是 $$X$$ 的 CDF。
• 给定一个 Copula（比如，二元变量）$$C(U, V)$$，我们会得到 $$U$$ 和 $$V$$ 具有均匀的边缘分布。
• 现在给定我们感兴趣的 R.V's $$X, Y$$，创建一个新的分布 $$C'(X, Y) = C(F_X(X), F_Y(Y))$$。$$X$$ 和 $$Y$$ 的边缘是我们想要的。

高斯 Copula

class GaussianCopulaTriL(tfd.TransformedDistribution):
"""Takes a location, and lower triangular matrix for the Cholesky factor."""
def __init__(self, loc, scale_tril):
super(GaussianCopulaTriL, self).__init__(
distribution=tfd.MultivariateNormalTriL(
loc=loc,
scale_tril=scale_tril),
bijector=tfb.NormalCDF(),
validate_args=False,
name="GaussianCopulaTriLUniform")

# Plot an example of this.
unit_interval = np.linspace(0.01, 0.99, num=200, dtype=np.float32)
x_grid, y_grid = np.meshgrid(unit_interval, unit_interval)
coordinates = np.concatenate(
[x_grid[..., np.newaxis],
y_grid[..., np.newaxis]], axis=-1)

pdf = GaussianCopulaTriL(
loc=[0., 0.],
scale_tril=[[1., 0.8], [0., 0.6]],
).prob(coordinates)

# Plot its density.

plt.contour(x_grid, y_grid, pdf, 100, cmap=plt.cm.jet);


\begin{align*} X &amp;\sim \text{Kumaraswamy}(a, b) \ Y &amp;\sim \text{Gumbel}(\mu, \beta) \end{align*}

a = 2.0
b = 2.0
gloc = 0.
gscale = 1.

x = tfd.Kumaraswamy(a, b)
y = tfd.Gumbel(loc=gloc, scale=gscale)

# Plot the distributions, assuming independence
x_axis_interval = np.linspace(0.01, 0.99, num=200, dtype=np.float32)
y_axis_interval = np.linspace(-2., 3., num=200, dtype=np.float32)
x_grid, y_grid = np.meshgrid(x_axis_interval, y_axis_interval)

pdf = x.prob(x_grid) * y.prob(y_grid)

# Plot its density

plt.contour(x_grid, y_grid, pdf, 100, cmap=plt.cm.jet);


具有不同边缘的联合分布

class WarpedGaussianCopula(tfd.TransformedDistribution):
"""Application of a Gaussian Copula on a list of target marginals.

This implements an application of a Gaussian Copula. Given [x_0, ... x_n]
which are distributed marginally (with CDF) [F_0, ... F_n],
GaussianCopula represents an application of the Copula, such that the
resulting multivariate distribution has the above specified marginals.

The marginals are specified by marginal_bijectors: These are
bijectors whose inverse encodes the CDF and forward the inverse CDF.

block_sizes is a 1-D Tensor to determine splits for marginal_bijectors
length should be same as length of marginal_bijectors.
See tfb.Blockwise for details
"""
def __init__(self, loc, scale_tril, marginal_bijectors, block_sizes=None):
super(WarpedGaussianCopula, self).__init__(
distribution=GaussianCopulaTriL(loc=loc, scale_tril=scale_tril),
bijector=tfb.Blockwise(bijectors=marginal_bijectors,
block_sizes=block_sizes),
validate_args=False,
name="GaussianCopula")


# Create our coordinates:
coordinates = np.concatenate(
[x_grid[..., np.newaxis], y_grid[..., np.newaxis]], -1)

def create_gaussian_copula(correlation):
# Use Gaussian Copula to add dependence.
return WarpedGaussianCopula(
loc=[0.,  0.],
scale_tril=[[1., 0.], [correlation, tf.sqrt(1. - correlation ** 2)]],
# These encode the marginals we want. In this case we want X_0 has
# Kumaraswamy marginal, and X_1 has Gumbel marginal.

marginal_bijectors=[
tfb.Invert(tfb.KumaraswamyCDF(a, b)),
tfb.Invert(tfb.GumbelCDF(loc=0., scale=1.))])

# Note that the zero case will correspond to independent marginals!
correlations = [0., -0.8, 0.8]
copulas = []
probs = []
for correlation in correlations:
copula = create_gaussian_copula(correlation)
copulas.append(copula)
probs.append(copula.prob(coordinates))

# Plot it's density

for correlation, copula_prob in zip(correlations, probs):
plt.figure()
plt.contour(x_grid, y_grid, copula_prob, 100, cmap=plt.cm.jet)
plt.title('Correlation {}'.format(correlation))


def kumaraswamy_pdf(x):
return tfd.Kumaraswamy(a, b).prob(np.float32(x))

def gumbel_pdf(x):
return tfd.Gumbel(gloc, gscale).prob(np.float32(x))

copula_samples = []
for copula in copulas:
copula_samples.append(copula.sample(10000))

plot_rows = len(correlations)
plot_cols = 2  # for 2  densities [kumarswamy, gumbel]
fig, axes = plt.subplots(plot_rows, plot_cols, sharex='col', figsize=(18,12))

# Let's marginalize out on each, and plot the samples.

for i, (correlation, copula_sample) in enumerate(zip(correlations, copula_samples)):
k = copula_sample[..., 0].numpy()
g = copula_sample[..., 1].numpy()

_, bins, _ = axes[i, 0].hist(k, bins=100, density=True)
axes[i, 0].plot(bins, kumaraswamy_pdf(bins), 'r--')
axes[i, 0].set_title('Kumaraswamy from Copula with correlation {}'.format(correlation))

_, bins, _ = axes[i, 1].hist(g, bins=100, density=True)
axes[i, 1].plot(bins, gumbel_pdf(bins), 'r--')
axes[i, 1].set_title('Gumbel from Copula with correlation {}'.format(correlation))


结论

[{ "type": "thumb-down", "id": "missingTheInformationINeed", "label":"没有我需要的信息" },{ "type": "thumb-down", "id": "tooComplicatedTooManySteps", "label":"太复杂/步骤太多" },{ "type": "thumb-down", "id": "outOfDate", "label":"内容需要更新" },{ "type": "thumb-down", "id": "translationIssue", "label":"翻译问题" },{ "type": "thumb-down", "id": "samplesCodeIssue", "label":"示例/代码问题" },{ "type": "thumb-down", "id": "otherDown", "label":"其他" }]
[{ "type": "thumb-up", "id": "easyToUnderstand", "label":"易于理解" },{ "type": "thumb-up", "id": "solvedMyProblem", "label":"解决了我的问题" },{ "type": "thumb-up", "id": "otherUp", "label":"其他" }]