##########Code by Alicja Wolny-Dominiak - woali@ue.katowice.pl library(VineCopula) library(CopulaRegression) library(MixedPoisson) library(scatterplot3d) library(insuranceData) data(dataOhlsson) #######________________ Example 1 data <- dataOhlsson E <- data$duration[data$skadkost>0&data$duration>0] N.nz <- data$antskad[data$skadkost>0&data$duration>0] Y.nz <- data$skadkost[data$skadkost>0&data$duration>0] data.nz <- as.data.frame(cbind(Y.nz,N.nz,E)) R <- S <- model.matrix(~1,data.nz) model.Y <- glm(Y.nz ~ -1+R, family = Gamma(link = "log")) beta.Y <- model.Y$coefficients phi <- summary(model.Y)$dispersion mu <- as.vector(exp(R %*% beta.Y)) sd.beta.Y <- sqrt(diag(vcov(model.Y))) E.Y.nz <- mu Var.Y.nz <- mu^2*phi model.N <- ztp.glm(N.nz, S, exposure=E) beta.N <- model.N$coefficients sd.beta.N <- model.N$sd lambda <- as.vector(exp(S %*% beta.N))*E E.N.nz <- (lambda*exp(lambda))/(exp(lambda)-1) Var.N.nz <- E.N.nz*(1-(lambda)/(exp(lambda)-1)) E.S <- E.Y.nz*E.N.nz Var.S <- E.Y.nz^2*Var.N.nz+E.N.nz*Var.Y.nz split.screen(c(1,2)) screen(1,new=TRUE) plot(quantile(E.S, probs = seq(0,0.95,0.005),type = 1),type="b", xlab="", ylab="probs = seq(0,0.95,0.005)",col=1, main="Quantiles of E[S] ") screen(2,new=TRUE) plot(quantile(E.S, probs = seq(0.95,1,0.005),type = 1),type="b", xlab="", ylab="probs = seq(0.95,1,0.005)",col=1, main="Quantiles of E[S]") #######________________ Example 2 density.Y.N <- function(y,k,u1,u2,u3,mu, phi,lambda, family) { theta.start=BiCopEst(rank(y - mu)/(length(y)+1), rank(k - lambda)/(length(k)+1), family)$par ## from the function copreg{CopulaRegression} tau.start=BiCopPar2Tau(par = theta.start, family = family) log.l <- function(parameters) { theta0 <- z2theta(parameters, family) out <- (-sum(log(BiCopHfunc(u1,u2,family, theta0)$hfunc1 - BiCopHfunc(u1,u3,family, theta0)$hfunc1))) return(out) } parameters.start <- theta2z(theta.start, family) parameters.ifm <- optim(parameters.start, log.l, method = "BFGS")$par theta.ifm <- z2theta(parameters.ifm, family) theta.ifm f.Y.N <- dgam(y,300,1.5)*(BiCopHfunc(u1,u2,family,theta.ifm)$hfunc1 - BiCopHfunc(u1,u3,family,theta.ifm)$hfunc1) outlist <- list(f.Y.N=f.Y.N, theta.ifm=theta.ifm, theta.start=theta.start) return(outlist) } k <-rpois(10000,1) k <-k[k>0] y <-rgam(length(k),300,1.5) u1 <-pgam(y,300,1.5) u2 <-ppois(k,1) u3 <-ppois(k-1,1) u3[u3<0] <-0 density.Y.N1 <- density.Y.N(y,k,u1,u2,u3,300,1.5,1,1)$f.Y.N density.Y.N5 <- density.Y.N(y,k,u1,u2,u3,300,1.5,1,5)$f.Y.N ###### Figure 3 split.screen(c(2,2)) screen(1,new=TRUE) scatterplot3d(y[1000] X.N=model.matrix(~1,data.frame(y)) ll.ztnb = function(param) { ll=sum(lgamma(y+param[1]^(-1)) - lgamma(param[1]^(-1)) - lgamma(y+1) - (y+param[1]^(-1))*log(1+param[1]*exp(X.N%*%param[2:(ncol( X.N)+1)])) + y*log(param[1]*exp(X.N%*%param[2:(ncol(X.N)+1)])) - log(1-(1+param[1]*exp(X.N%*%param[2:(ncol(X.N)+1)]))^(-param[1]^(-1)))) return(ll)} opt <-optim(par=c(1,1), fn = ll.ztnb, control = list(fnscale=-1), method = "BFGS") beta.ztnb <-opt$par[2:(ncol(X.N)+1)] alpha = 1] <- dNBI(y[y >= 1], mu, sigma, log = FALSE)/(1 - dNBI(0, mu, sigma, log = FALSE)) return(out) } density.y <-dztnb(y,mu,sigma) # F.ZTNB pztnb <- function (y,mu,sigma) { out <- (pNBI(y, mu, sigma, lower.tail = TRUE, log.p = FALSE) - (1+mu/sigma)^(-sigma))/(1 - (1+mu/sigma)^(-sigma)) out[y < 1] = 0 return(out) } ##### Figure 4 y1=1:16 density.y1 <-dztnb(y1,mu[1],sigma) distribution.y1 <-pztnb(y1,mu[1],sigma) par(mfrow = c(1, 2)) plot(y1,density.y1, type="h", axes = F, lwd=3, xlab="", ylab="Probability mass function") axis(1, 1:16) axis(2, round(density.y1,3)) plot(y1,distribution.y1, type="s", axes = F, lwd=2, xlab="", ylab="Cumulative distribution function") axis(1, 1:16) axis(2) #######________________ Example 5 # Use your own data called dane.nz exposure=dane.nz$EXPOSURE x=dane.nz$CLAIM_AMOUNT y=dane.nz$CLAIM_COUNT S.variable=x*y ##### Figure 5 par(mfrow = c(1, 3)) hist(x, xlab="The average value of claims", main="") abline(v=mean(x), col="red", lty=2, lwd=3) hist(y, xlab="The number of claims", main="") abline(v=mean(y), col="red", lty=2, lwd=3) hist(S.variable, xlab="Total claim amount", main="") abline(v=mean(S.variable), col="red", lty=2, lwd=3) ### IFM estimation ##Gamma GLM category <-levels(interaction(dane.nz$POWER, dane.nz$SEX, dane.nz$PREMIUM_SPLIT)) bwplot(x ~ as.factor(SEX)|as.factor(POWER)+as.factor(PREMIUM_SPLIT) , data=dane.nz, xlab="", ylab="The average value of claims") X.Y=model.matrix(~as.factor(POWER)+as.factor(SEX)+as.factor(PREMIUM_SPLIT),dane.nz) gamma.glm <- glm(x ~ as.factor(POWER)+as.factor(SEX)+as.factor(PREMIUM_SPLIT), family = Gamma(link = "log"), data=dane.nz) alpha <- gamma.glm$coefficients delta <- summary(gamma.glm)$dispersion mu.g <- as.vector(exp(X.Y %*% alpha)) table(mu.g,as.factor(dane.nz$POWER):as.factor(dane.nz$SEX):as.factor(dane.nz$PREMIUM_SPLIT)) ##### Figure 11 - apendix par(mfrow = c(1, 3)) boxplot(mu.g~as.factor(dane.nz$POWER), xlab="", ylab="Fitted values", main="POWER RANGE") boxplot(mu.g~as.factor(dane.nz$SEX), xlab="", ylab="Fitted values", main="GENDER") boxplot(mu.g~as.factor(dane.nz$PREMIUM_SPLIT), xlab="", ylab="Fitted values", main="PREMIUN SPLIT") ##ZTNB regression X.N=model.matrix(~1,data.frame(y)) ll.ztnb = function(param) { ll=sum(lgamma(y+param[1]^(-1)) - lgamma(param[1]^(-1)) - lgamma(y+1) - (y+param[1]^(-1))*log(1+param[1]*exp(X.N%*%param[2:(ncol(X.N)+1)])) + y*log(param[1]*exp(X.N%*%param[2:(ncol(X.N)+1)])) - log(1-(1+param[1]*exp(X.N%*%param[2:(ncol(X.N)+1)]))^(-param[1]^(-1)))) return(ll)} opt <-optim(par=c(1,1), fn = ll.ztnb, control = list(fnscale=-1), hessian = TRUE) se <-sqrt(diag(ginv(opt$hessian))) beta.ztnb <-opt$par[2:(ncol(X.N)+1)] lambda <-exp(X.N%*%beta.ztnb) sigma <-1/opt$par[1] E.N.ztnb <-exposure*lambda/(1-(1+lambda*opt$par[1])^(-opt$par[1]^(-1))) table(E.N.ztnb,as.factor(dane.nz$POWER):as.factor(dane.nz$SEX):as.factor(dane.nz$PREMIUM_SPLIT)) E.N.ztnb.values <-as.data.frame(table(E.N.ztnb)) ##### Figure 12 - appendix par(mfrow = c(1, 3)) boxplot(E.N.ztnb[exposure<1]~as.factor(dane.nz$POWER[exposure<1]), xlab="", ylab="Fitted values", main="POWER RANGE") boxplot(E.N.ztnb[exposure<1]~as.factor(dane.nz$SEX[exposure<1]), xlab="", ylab="Fitted values", main="GENDER") boxplot(E.N.ztnb[exposure<1]~as.factor(dane.nz$PREMIUM_SPLIT[exposure<1]), xlab="", ylab="Fitted values", main="PREMIUN SPLIT") ####Copula parameter family=4 theta_initial=BiCopEst(rank(x - mu.g)/(length(x) + 1), rank(y - lambda)/(length(y) + 1), family = family)$par tau_initial=BiCopPar2Tau(par = theta_initial, family = family) u1 <- pgam(x, mu.g , delta) u2 <- pztnb(y, lambda, sigma) u3 <- pztnb(y - 1, lambda, sigma) foo <- function(para) { theta0 <- z2theta(para, family) out <- (-sum(log(BiCopHfunc(u1,u2,family, theta0)$hfunc1 - BiCopHfunc(u1, u3, family, theta0)$hfunc1))) return(out) } para_initial <- theta2z(theta_initial, family) para.ifm <- optim(para_initial, foo, method = "BFGS")$par theta.ifm <- z2theta(para_initial, family) tau.ifm <- BiCopPar2Tau(par = theta.ifm, family = family) theta.ifm4=theta.ifm tau.ifm4=tau.ifm #############___________DENSITY ### S_i copula-based density n <- length(S.variable) density.copula.S <- vector(length = n) for (i in 1:n) { y.n <- 1:300 u1 <- pgam(S.variable[i]/y.n, mu.g[i] , delta) u2 <- pztnb(y.n, lambda[i], sigma) u3 <- pztnb(y.n - 1, lambda[i], sigma) par_der <- D_u(u1, u2, theta.ifm, family) - D_u(u1, u3, theta.ifm, family) dummy <- par_der * dgam(S.variable[i]/y.n, mu.g[i], delta)/y.n density.copula.S[i] <- sum(dummy) } #####Expected total claim amount under independence - formula (2) E.Y.gamma <-mu.g E.S <-E.Y.gamma*E.N.ztnb ###### Expectation E[YN] #Monte-Carlo E.S1<-vector(length = length(S.variable)) for (i in 1:length(S.variable)){ S1=BiCopSim(N=1000, family=4, par=theta.ifm4) # Gumbel copula S1.Y=S1[,1] S1.N=S1[,2] Y1=qgamma(S1.Y,mu.g[i],1) N1=qNBI(S1.N,E.N.ztnb[i],sigma) E.S1[i]<-mean(Y1*N1) } ###########______GROUPS cat <-interaction(dane.nz$POWER,dane.nz$SEX, dane.nz$PREMIUM_SPLIT) category <-levels(cat) category.nr<-cat category.factor<-cat for (j in 1:length(category)) {category.nr<-ifelse(category.factor==category[j],j,category.nr)} fit.data <-data.frame(dane.nz$POWER,dane.nz$SEX, dane.nz$PREMIUM_SPLIT,x,y, S.variable, mu.g,lambda, E.S, E.S1, density.copula.S, category.nr, category.factor) ####### Figure 7 par(mfrow=c(1,2)) plot(S.variable,density.copula.S, xlab="", ylab="", main="Copula-based density") plot(density(S.variable)$x[density(S.variable)$x>0],density(S.variable)$y[density(S.variable)$x>0],main="Kernel density", xlab="", ylab="") ####### Figure 8 xyplot(fit.data[,11]~S.variable|fit.data[,13], xlab="", ylab="" ) ####### Figure 9 split.screen(c(2,2)) screen(1,new=TRUE) hist(E.S1, breaks=30, main="Histogram - Gumbel copula", xlab="", ylab="") screen(2,new=TRUE) plot(quantile(E.S1, probs = seq(0,1,0.01),type = 1),type="b", xlab="", ylab="", main="Quantiles - Gumbel copula",col=1) screen(3,new=TRUE) hist(E.S, breaks=30, main="Histogram - independence", xlab="", ylab="") screen(4,new=TRUE) plot(quantile(E.S, probs = seq(0,1,0.01),type = 1),type="b", xlab="", ylab="",col=1, main="Quantiles - independence") ######## Figure 10 bwplot(~E.S1|fit.data[,13], xlab="", ylab="")