overall.data <- read.csv('MLB Stats - Overall.csv')
overall.data <- subset(overall.data,select= c(-X))
overall.data <- ts(overall.data, start=1961)
plot(overall.data,type='o', main="Statistics Over Time in MLB")
train <- subset(overall.data, subset= time(overall.data) < 2010)
test <- subset(overall.data, subset= time(overall.data) >= 2010)
#split data for testing true model
train <- ts(train, start=1961)
test <- ts(test, start=2010)
plot(train,type='o')
HR.data <- train[,"HR"]
hr.model <- lm(HR.data~time(HR.data))
loghr.model <- lm(log(HR.data)~time(HR.data))
summary(hr.model)
##
## Call:
## lm(formula = HR.data ~ time(HR.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.7472 -5.7753 0.2292 6.2471 15.2617
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.956e+03 1.704e+02 -11.48 3.12e-15 ***
## time(HR.data) 9.989e-01 8.585e-02 11.63 1.94e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.499 on 47 degrees of freedom
## Multiple R-squared: 0.7423, Adjusted R-squared: 0.7368
## F-statistic: 135.4 on 1 and 47 DF, p-value: 1.939e-15
summary(loghr.model)
##
## Call:
## lm(formula = log(HR.data) ~ time(HR.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.17514 -0.11868 0.09318 0.33522 1.02490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -92.085281 12.588737 -7.315 2.73e-09 ***
## time(HR.data) 0.047897 0.006342 7.553 1.19e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6278 on 47 degrees of freedom
## Multiple R-squared: 0.5483, Adjusted R-squared: 0.5386
## F-statistic: 57.04 on 1 and 47 DF, p-value: 1.194e-09
hits.data <- train[,"Hits"]
hits.model <- lm(hits.data~time(hits.data))
sqrthits.model <- lm(sqrt(hits.data)~time(hits.data))
summary(hits.model)
##
## Call:
## lm(formula = hits.data ~ time(hits.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.9242 -3.9746 0.3419 4.3349 14.9680
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.271e+03 1.446e+02 -15.71 <2e-16 ***
## time(hits.data) 1.158e+00 7.284e-02 15.90 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.211 on 47 degrees of freedom
## Multiple R-squared: 0.8433, Adjusted R-squared: 0.8399
## F-statistic: 252.8 on 1 and 47 DF, p-value: < 2.2e-16
summary(sqrthits.model)
##
## Call:
## lm(formula = sqrt(hits.data) ~ time(hits.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.47287 -0.38823 0.00947 0.37776 1.68321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.347e+02 1.452e+01 -16.17 <2e-16 ***
## time(hits.data) 1.207e-01 7.313e-03 16.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.724 on 47 degrees of freedom
## Multiple R-squared: 0.8529, Adjusted R-squared: 0.8498
## F-statistic: 272.5 on 1 and 47 DF, p-value: < 2.2e-16
runs.data <- train[,"Runs"]
runs.model <- lm(runs.data~time(runs.data))
logruns.model <- lm(log(runs.data)~time(runs.data))
sqrtruns.model <- lm(sqrt(runs.data)~time(runs.data))
summary(runs.model)
##
## Call:
## lm(formula = runs.data ~ time(runs.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.8198 -5.0902 0.5065 4.8049 14.1359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.253e+03 1.407e+02 -16.02 <2e-16 ***
## time(runs.data) 1.149e+00 7.086e-02 16.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.014 on 47 degrees of freedom
## Multiple R-squared: 0.8484, Adjusted R-squared: 0.8452
## F-statistic: 263 on 1 and 47 DF, p-value: < 2.2e-16
summary(logruns.model)
##
## Call:
## lm(formula = log(runs.data) ~ time(runs.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.04933 -0.18499 0.03511 0.28652 0.87553
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.093e+02 9.890e+00 -11.05 1.16e-14 ***
## time(runs.data) 5.657e-02 4.982e-03 11.35 4.54e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4932 on 47 degrees of freedom
## Multiple R-squared: 0.7328, Adjusted R-squared: 0.7271
## F-statistic: 128.9 on 1 and 47 DF, p-value: 4.544e-15
summary(sqrtruns.model)
##
## Call:
## lm(formula = sqrt(runs.data) ~ time(runs.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.90230 -0.55305 0.04637 0.44200 1.58732
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.329e+02 1.468e+01 -15.87 <2e-16 ***
## time(runs.data) 1.198e-01 7.393e-03 16.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7319 on 47 degrees of freedom
## Multiple R-squared: 0.8482, Adjusted R-squared: 0.8449
## F-statistic: 262.5 on 1 and 47 DF, p-value: < 2.2e-16
SO.data <- train[,"SO"]
so.model <- lm(SO.data~time(SO.data))
logso.model <- lm(log(SO.data)~time(SO.data))
summary(so.model)
##
## Call:
## lm(formula = SO.data ~ time(SO.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5354 -2.9292 0.3088 1.8923 14.0482
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.842e+03 9.838e+01 -18.72 <2e-16 ***
## time(SO.data) 9.405e-01 4.956e-02 18.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.906 on 47 degrees of freedom
## Multiple R-squared: 0.8846, Adjusted R-squared: 0.8821
## F-statistic: 360.1 on 1 and 47 DF, p-value: < 2.2e-16
summary(logso.model)
##
## Call:
## lm(formula = log(SO.data) ~ time(SO.data))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.69738 -0.25659 0.05418 0.17767 1.06303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.007e+02 9.755e+00 -10.32 1.15e-13 ***
## time(SO.data) 5.221e-02 4.914e-03 10.62 4.39e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4865 on 47 degrees of freedom
## Multiple R-squared: 0.706, Adjusted R-squared: 0.6997
## F-statistic: 112.9 on 1 and 47 DF, p-value: 4.391e-14
par(mfrow=c(2,2))
plot(HR.data,type='o');abline(hr.model)
plot(hits.data,type='o');abline(hits.model)
plot(runs.data,type='o');abline(runs.model)
plot(SO.data,type='o');abline(so.model)
coeftest(hr.model, vcov=vcovHAC(hr.model))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1956.01684 226.75817 -8.6260 3.013e-11 ***
## time(HR.data) 0.99888 0.11445 8.7278 2.136e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(hits.model, vcov=vcovHAC(hits.model))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.2713e+03 1.6068e+02 -14.136 < 2.2e-16 ***
## time(hits.data) 1.1583e+00 8.1379e-02 14.233 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(runs.model, vcov=vcovHAC(runs.model))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.2534e+03 1.6373e+02 -13.763 < 2.2e-16 ***
## time(runs.data) 1.1492e+00 8.2857e-02 13.869 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(so.model, vcov=vcovHAC(so.model))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.8419e+03 1.2405e+02 -14.848 < 2.2e-16 ***
## time(SO.data) 9.4051e-01 6.2183e-02 15.125 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2))
plot(rstandard(hr.model),type='o')
plot(rstandard(hits.model),type='o')
plot(rstandard(runs.model),type='o')
plot(rstandard(so.model),type='o')
hrres <- rstandard(hr.model)
hitsres <- rstandard(sqrthits.model)
runsres <- rstandard(sqrtruns.model)
sores <- rstandard(logso.model)
par(mfrow= c(2,2))
plot(hrres,type='o')
plot(hitsres,type='o')
plot(runsres,type='o')
plot(sores,type='o')
par(mfrow=c(2,2))
qqnorm(hrres);qqline(hrres)
qqnorm(hitsres);qqline(hitsres)
qqnorm(runsres);qqline(runsres)
qqnorm(sores);qqline(sores)
par(mfrow=c(2,2))
hist(hrres)
hist(hitsres)
hist(runsres)
hist(sores)
shapiro.test(hrres)
##
## Shapiro-Wilk normality test
##
## data: hrres
## W = 0.97858, p-value = 0.5074
shapiro.test(hitsres)
##
## Shapiro-Wilk normality test
##
## data: hitsres
## W = 0.98146, p-value = 0.6277
shapiro.test(runsres)
##
## Shapiro-Wilk normality test
##
## data: runsres
## W = 0.98754, p-value = 0.8795
shapiro.test(sores)
##
## Shapiro-Wilk normality test
##
## data: sores
## W = 0.88791, p-value = 0.0002265
par(mfrow=c(2,2))
acf(hrres)
acf(hitsres)
acf(runsres)
acf(sores)
par(mfrow=c(2,2))
runs(hrres)
## $pvalue
## [1] 0.401
##
## $observed.runs
## [1] 22
##
## $expected.runs
## [1] 25.40816
##
## $n1
## [1] 23
##
## $n2
## [1] 26
##
## $k
## [1] 0
runs(hitsres)
## $pvalue
## [1] 1
##
## $observed.runs
## [1] 25
##
## $expected.runs
## [1] 25.4898
##
## $n1
## [1] 24
##
## $n2
## [1] 25
##
## $k
## [1] 0
runs(runsres)
## $pvalue
## [1] 0.829
##
## $observed.runs
## [1] 24
##
## $expected.runs
## [1] 25.2449
##
## $n1
## [1] 22
##
## $n2
## [1] 27
##
## $k
## [1] 0
runs(sores)
## $pvalue
## [1] 0.0398
##
## $observed.runs
## [1] 17
##
## $expected.runs
## [1] 24.26531
##
## $n1
## [1] 19
##
## $n2
## [1] 30
##
## $k
## [1] 0
par(mfrow=c(2,2))
plot(hrres, type='o')
plot(hitsres, type='o')
plot(runsres, type='o')
plot(sores, type='o')
adf.test(hrres)
##
## Augmented Dickey-Fuller Test
##
## data: hrres
## Dickey-Fuller = -2.735, Lag order = 3, p-value = 0.28
## alternative hypothesis: stationary
adf.test(hitsres)
##
## Augmented Dickey-Fuller Test
##
## data: hitsres
## Dickey-Fuller = -2.0007, Lag order = 3, p-value = 0.5739
## alternative hypothesis: stationary
adf.test(runsres)
##
## Augmented Dickey-Fuller Test
##
## data: runsres
## Dickey-Fuller = -2.4859, Lag order = 3, p-value = 0.3797
## alternative hypothesis: stationary
adf.test(sores)
##
## Augmented Dickey-Fuller Test
##
## data: sores
## Dickey-Fuller = -3.2888, Lag order = 3, p-value = 0.08391
## alternative hypothesis: stationary
pp.test(hrres)
## Warning in pp.test(hrres): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: hrres
## Dickey-Fuller Z(alpha) = -32.301, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary
pp.test(hitsres)
## Warning in pp.test(hitsres): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: hitsres
## Dickey-Fuller Z(alpha) = -37.906, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary
pp.test(runsres)
## Warning in pp.test(runsres): p-value smaller than printed p-value
##
## Phillips-Perron Unit Root Test
##
## data: runsres
## Dickey-Fuller Z(alpha) = -35.665, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary
pp.test(sores)
##
## Phillips-Perron Unit Root Test
##
## data: sores
## Dickey-Fuller Z(alpha) = -24.699, Truncation lag parameter = 3, p-value
## = 0.01344
## alternative hypothesis: stationary
kpss.test(hrres)
## Warning in kpss.test(hrres): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: hrres
## KPSS Level = 0.12324, Truncation lag parameter = 3, p-value = 0.1
kpss.test(hitsres)
## Warning in kpss.test(hitsres): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: hitsres
## KPSS Level = 0.08834, Truncation lag parameter = 3, p-value = 0.1
kpss.test(runsres)
## Warning in kpss.test(runsres): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: runsres
## KPSS Level = 0.070726, Truncation lag parameter = 3, p-value = 0.1
kpss.test(sores)
## Warning in kpss.test(sores): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: sores
## KPSS Level = 0.12208, Truncation lag parameter = 3, p-value = 0.1
par(mfrow=c(2,2))
acf(hrres)
acf(hitsres)
acf(runsres)
acf(sores)
par(mfrow=c(2,2))
pacf(hrres)
pacf(hitsres)
pacf(runsres)
pacf(sores)
print("HR")
## [1] "HR"
eacf(hrres)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o o o o o o o
## 1 x o o o o o o o o o o o o o
## 2 x o o o o o o o o o o o o o
## 3 o o o o o o o o o o o o o o
## 4 o o o o o o o o o o o o o o
## 5 x x o o o o o o o o o o o o
## 6 x x o o o o o o o o o o o o
## 7 x x o o o o o o o o o o o o
print("Hits")
## [1] "Hits"
eacf(hitsres)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o x o x x o o
## 1 x o o o o o o o o o o o o o
## 2 x o o o o o o o o o o o o o
## 3 o x o o o o o o o o o o o o
## 4 o x o o o o o o o o o o o o
## 5 x o o o o o o o o o o o o o
## 6 x o o o o o o o o o o o o o
## 7 o o x o o o o o o o o o o o
print("Runs")
## [1] "Runs"
eacf(runsres)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 o o o o o o o o o o x o o o
## 1 o o o o o o o o o o o o o o
## 2 x o o o o o o o o o o o o o
## 3 o o o o o o o o o o o o o o
## 4 o o o o o o o o o o o o o o
## 5 x o o o o o o o o o o o o o
## 6 o o o o o o o o o o o o o o
## 7 o o x o o o o o o o o o o o
print("SO")
## [1] "SO"
eacf(sores)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o o o o o o o o o o o o o
## 1 x o o o o o o o o o o o o o
## 2 x o o o o o o o o o o o o o
## 3 x o o o o o o o o o o o o o
## 4 x o o o o o o o o o o o o o
## 5 o o o o o o o o o o o o o o
## 6 x o x o o o o o o o o o o o
## 7 x o x o o o o o o o o o o o
For HR: IMA(1,1) or ARI(3,1)
For Hits: IMA(1,1) or ARI(3,1)
For Runs:
ARI(1,1)
For SO:
ARI(5,1) or IMA(1,1)
hr.MA <- Arima(hrres, c(0,1,1))
hr.AR3 <- Arima(hrres, c(3,1,0))
hr.MA
## Series: hrres
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.6334
## s.e. 0.1478
##
## sigma^2 = 1.018: log likelihood = -68.29
## AIC=140.58 AICc=140.84 BIC=144.32
hr.AR3
## Series: hrres
## ARIMA(3,1,0)
##
## Coefficients:
## ar1 ar2 ar3
## -0.5507 -0.3258 -0.1511
## s.e. 0.1416 0.1551 0.1410
##
## sigma^2 = 1.082: log likelihood = -68.63
## AIC=145.27 AICc=146.2 BIC=152.75
hits.MA <- Arima(hitsres, c(0,1,1))
hits.AR <- Arima(hitsres, c(3,1,0))
hits.MA
## Series: hitsres
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.6153
## s.e. 0.1201
##
## sigma^2 = 0.9927: log likelihood = -67.66
## AIC=139.33 AICc=139.6 BIC=143.07
hits.AR
## Series: hitsres
## ARIMA(3,1,0)
##
## Coefficients:
## ar1 ar2 ar3
## -0.6524 -0.3704 -0.1078
## s.e. 0.1500 0.1682 0.1483
##
## sigma^2 = 1.026: log likelihood = -67.41
## AIC=142.83 AICc=143.76 BIC=150.31
runs.AR <- Arima(runsres, c(1,1,0))
runs.AR
## Series: runsres
## ARIMA(1,1,0)
##
## Coefficients:
## ar1
## -0.4009
## s.e. 0.1310
##
## sigma^2 = 1.249: log likelihood = -73.03
## AIC=150.05 AICc=150.32 BIC=153.8
so.AR <- Arima(sores, c(5,1,0))
so.MA <- Arima(sores, c(0,1,1))
so.AR
## Series: sores
## ARIMA(5,1,0)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5
## -0.3555 -0.0444 -0.1564 -0.1196 0.1052
## s.e. 0.1500 0.1709 0.1761 0.1764 0.1566
##
## sigma^2 = 0.8642: log likelihood = -62.13
## AIC=136.25 AICc=138.3 BIC=147.48
so.MA
## Series: sores
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.3507
## s.e. 0.1490
##
## sigma^2 = 0.8323: log likelihood = -63.27
## AIC=130.53 AICc=130.8 BIC=134.27
keep hr.MA
, hits.MA
, runs.AR
,
so.MA
tsdiag(hr.MA)
tsdiag(hits.MA)
tsdiag(runs.AR)
tsdiag(so.MA)
hr.overfit <- Arima(hrres, c(0,1,2))
hits.overfit <- Arima(hitsres, c(0,1,2))
runs.overfit <- Arima(runsres, c(2,1,0))
so.overfit <- Arima(sores, c(0,1,2))
data.frame(
Model= c("hr.MA","hits.MA","runs.AR","so.MA","hr.overfit","hits.overfit","runs.overfit","so.overfit"),
AIC= c(hr.MA$aic, hits.MA$aic, runs.AR$aic, so.MA$aic, hr.overfit$aic, hits.overfit$aic, runs.overfit$aic, so.overfit$aic),
AICC= c(hr.MA$aicc, hits.MA$aicc, runs.AR$aicc, so.MA$aicc, hr.overfit$aicc, hits.overfit$aicc, runs.overfit$aicc, so.overfit$aicc),
BIC= c(hr.MA$bic, hits.MA$bic, runs.AR$bic, so.MA$bic, hr.overfit$bic, hits.overfit$bic, runs.overfit$bic, so.overfit$bic)
)
## Model AIC AICC BIC
## 1 hr.MA 140.5766 140.8432 144.3190
## 2 hits.MA 139.3295 139.5962 143.0719
## 3 runs.AR 150.0535 150.3202 153.7959
## 4 so.MA 130.5302 130.7969 134.2726
## 5 hr.overfit 141.9884 142.5338 147.6020
## 6 hits.overfit 141.1190 141.6645 146.7326
## 7 runs.overfit 148.5819 149.1273 154.1955
## 8 so.overfit 132.4430 132.9884 138.0566
#HR
HR.MA1_xreg=Arima(HR.data,order=c(0,1,1),xreg=1:length(HR.data))
newtm=seq(from=length(HR.data)+1,to=length(HR.data)+10,length=10)
predx=predict(HR.MA1_xreg,n.ahead=10,newxreg=newtm)
pr=predx$pred
uci=pr+2*predx$se
lci=pr-2*predx$se
# To plot the predicted values as prediction intervals, code them as time series
pr=ts(pr,start=2010,freq=1)
uci=ts(uci,start=2010,freq=1)
lci=ts(lci,start=2010,freq=1)
ymin=min(c(as.vector(lci),HR.data))-.1
ymax=max(c(as.vector(uci),HR.data))+.1
par(mfrow=c(1,1))
plot(HR.data,xlim=c(1961,2020),ylim=c(ymin,ymax),main="HR.data")
lines(test[,"HR"], type='o')
lines(pr,col=2)
lines(uci,col=3)
lines(lci,col=3)
#Hits
hits.MA1_xreg=Arima(hits.data,order=c(0,1,1),xreg=1:length(hits.data))
newtm=seq(from=length(hits.data)+1,to=length(hits.data)+10,length=10)
predx=predict(hits.MA1_xreg,n.ahead=10,newxreg=newtm)
pr=predx$pred
uci=pr+2*predx$se
lci=pr-2*predx$se
# To plot the predicted values as prediction intervals, code them as time series
pr=ts(pr,start=2010,freq=1)
uci=ts(uci,start=2010,freq=1)
lci=ts(lci,start=2010,freq=1)
ymin=min(c(as.vector(lci),hits.data))-.1
ymax=max(c(as.vector(uci),hits.data))+.1
par(mfrow=c(1,1))
plot(hits.data,xlim=c(1961,2020),ylim=c(ymin,ymax),main="Hits.data")
lines(test[,"Hits"], type='o')
lines(pr,col=2)
lines(uci,col=3)
lines(lci,col=3)
#Runs
runs.AR1_xreg=Arima(runs.data,order=c(1,1,0),xreg=1:length(runs.data))
newtm=seq(from=length(runs.data)+1,to=length(runs.data)+10,length=10)
predx=predict(runs.AR1_xreg,n.ahead=10,newxreg=newtm)
pr=predx$pred
uci=pr+2*predx$se
lci=pr-2*predx$se
# To plot the predicted values as prediction intervals, code them as time series
pr=ts(pr,start=2010,freq=1)
uci=ts(uci,start=2010,freq=1)
lci=ts(lci,start=2010,freq=1)
ymin=min(c(as.vector(lci),runs.data))-.1
ymax=max(c(as.vector(uci),runs.data))+.1
par(mfrow=c(1,1))
plot(runs.data,xlim=c(1961,2020),ylim=c(ymin,ymax),main="runs.data")
lines(test[,"Runs"], type='o')
lines(pr,col=2)
lines(uci,col=3)
lines(lci,col=3)
#SO
SO.MA1_xreg=Arima(SO.data,order=c(0,1,1),xreg=1:length(SO.data))
newtm=seq(from=length(SO.data)+1,to=length(SO.data)+10,length=10)
predx=predict(SO.MA1_xreg,n.ahead=10,newxreg=newtm)
pr=predx$pred
uci=pr+2*predx$se
lci=pr-2*predx$se
# To plot the predicted values as prediction intervals, code them as time series
pr=ts(pr,start=2010,freq=1)
uci=ts(uci,start=2010,freq=1)
lci=ts(lci,start=2010,freq=1)
ymin=min(c(as.vector(lci),SO.data))-.1
ymax=max(c(as.vector(uci),SO.data))+.1
par(mfrow=c(1,1))
plot(SO.data,xlim=c(1961,2020),ylim=c(ymin,ymax),main="SO.data")
lines(test[,"SO"], type='o')
lines(pr,col=2)
lines(uci,col=3)
lines(lci,col=3)