This is an R Markdown document. Markdown is a simple formatting syntax for authoring web pages (click the MD toolbar button for help on Markdown).
When you click the Knit HTML button in RSTUDIO a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
# read in relevant packages for workshop - packages should be installed
# first
library("pxR") # pxr for reading .px files
## Loading required package: stringr
library("ggplot2") # plotting package
library("reshape") # contains useful functions for reshaping data
## Loading required package: plyr
##
## Attaching package: 'reshape'
##
## The following objects are masked from 'package:plyr':
##
## rename, round_any
library("forecast") # some time series functions we will use
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: timeDate
## This is forecast 5.1
Now to simply read in a .px file into the R environment use the command read.px
The as.data.frame function will convert the .px object to a data frame for later use.
The head command displays the first few lines of the frame.
It is useful to keep the .px object in memory - say to examine patterns in codes which we will do later.
We print out the codes, as the pattern in the codes will be useful for selecting welfare office or counties later in the program.
LRM30.px.object <- read.px("http://www.cso.ie/px/pxeirestat/Database/eirestat/Live%20Register%20Detailed%20Flow%20Analysis/LRM30.px")
LRM30.px <- as.data.frame(LRM30.px.object)
head(LRM30.px)
## Age.Group Sex Statistic
## 1 Under 20 years Both sexes Live register (Persons)
## 2 20 - 24 years Both sexes Live register (Persons)
## 3 Under 25 years Both sexes Live register (Persons)
## 4 25 - 34 years Both sexes Live register (Persons)
## 5 25 years and over Both sexes Live register (Persons)
## 6 35 - 44 years Both sexes Live register (Persons)
## Social.Welfare.Office Month dat
## 1 State 2011M04 13730
## 2 State 2011M04 65570
## 3 State 2011M04 79300
## 4 State 2011M04 137260
## 5 State 2011M04 359410
## 6 State 2011M04 97520
LRM30.px.CODES <- LRM30.px.object$CODES
LRM30.px.LABELS <- LRM30.px.object$VALUES
# LRM30.px.CODES
LRM30.px.CODES$Social.Welfare.Office
## [1] "-" "01" "0101" "0102" "0103" "32" "3201" "3204" "3202" "3203"
## [11] "16" "1601" "1602" "1603" "1604" "18" "1801" "1802" "1803" "1804"
## [21] "1817" "1805" "1806" "17" "1807" "1808" "1809" "1810" "1811" "1812"
## [31] "1813" "1814" "1815" "1816" "33" "3301" "3302" "3303" "3304" "3306"
## [41] "3307" "3308" "3309" "3305" "8000" "0207" "0401" "0210" "0208" "0212"
## [51] "0213" "0302" "0214" "0501" "0211" "0201" "0209" "0216" "0205" "0204"
## [61] "0303" "0402" "0301" "0206" "0215" "0203" "0202" "27" "2701" "2702"
## [71] "26" "2703" "2704" "2705" "2706" "19" "1901" "1902" "1907" "1903"
## [81] "1904" "1905" "1906" "06" "0601" "0602" "0603" "0604" "07" "0701"
## [91] "0702" "0703" "0704" "08" "0801" "0802" "0803" "28" "2801" "2802"
## [101] "21" "2101" "20" "2102" "09" "0901" "0902" "10" "1001" "1002"
## [111] "1003" "29" "2901" "2902" "2903" "2904" "2905" "2906" "2907" "2908"
## [121] "11" "1101" "1102" "1103" "34" "3401" "3402" "3403" "3404" "22"
## [131] "2201" "2202" "2203" "12" "1201" "1202" "1203" "30" "3001" "3002"
## [141] "3003" "31" "3101" "3102" "23" "2301" "2302" "2303" "2304" "2305"
## [151] "25" "2501" "2502" "24" "13" "1301" "1302" "1303" "14" "1401"
## [161] "1402" "1403" "1404" "15" "1501" "1502" "1503" "1504" "1505"
LRM30.px.LABELS$Social.Welfare.Office
## [1] "State" "Carlow County"
## [3] "Muine Bheag (Bagenalstown)" "Carlow"
## [5] "Tullow" "Cavan County"
## [7] "Belturbet" "Ballyconnell"
## [9] "Cavan" "Bailieboro"
## [11] "Clare County" "Ennis"
## [13] "Ennistymon" "Kilrush"
## [15] "Tulla" "Cork County"
## [17] "Bandon" "Bantry"
## [19] "Bantry (SWLO)" "Castletownbere"
## [21] "Carrigaline" "Clonakilty"
## [23] "Cobh" "Cork City"
## [25] "Dunmanway" "Fermoy"
## [27] "Kinsale" "Macroom"
## [29] "Mallow" "Midleton"
## [31] "Newmarket" "Passage West"
## [33] "Skibbereen" "Youghal"
## [35] "Donegal County" "Ballybofey"
## [37] "Ballyshannon" "Buncrana"
## [39] "Donegal" "Dunfanaghy"
## [41] "Dungloe" "Killybegs"
## [43] "Letterkenny" "Donegal Control"
## [45] "Dublin County" "Apollo House (Tara Street)"
## [47] "Balbriggan" "Ballyfermot"
## [49] "Ballymun" "Bishop Square"
## [51] "Blanchardstown" "Clondalkin"
## [53] "Coolock" "Dun Laoghaire"
## [55] "Finglas" "Gardiner Street"
## [57] "Kilbarrack" "Kings Inn Street"
## [59] "Navan Road" "Nth Cumberland Street"
## [61] "Nutgrove (Rathfarnham)" "Swords"
## [63] "Tallaght" "Thomas Street"
## [65] "Townsend Street" "Victoria Street"
## [67] "Werburg Street" "Galway County"
## [69] "Ballinasloe" "Clifden"
## [71] "Galway City" "Gort"
## [73] "Loughrea" "Oughterard"
## [75] "Tuam" "Kerry County"
## [77] "Caherciveen" "Dingle"
## [79] "Kenmare" "Killarney"
## [81] "Killorglin" "Listowel"
## [83] "Tralee" "Kildare County"
## [85] "Athy" "Kildare Town"
## [87] "Maynooth" "Newbridge"
## [89] "Kilkenny County" "Callan"
## [91] "Kilkenny" "Thomastown"
## [93] "Castlecomer" "Laoighis County"
## [95] "Portarlington" "Portlaoise"
## [97] "Rathdowney" "Leitrim County"
## [99] "Carrick-On-Shannon" "Manorhamilton"
## [101] "Limerick County" "Kilmallock"
## [103] "Limerick City" "Newcastle West"
## [105] "Longford County" "Granard"
## [107] "Longford" "Louth County"
## [109] "Ardee" "Drogheda"
## [111] "Dundalk" "Mayo County"
## [113] "Achill" "Ballina"
## [115] "Ballinrobe" "Belmullet"
## [117] "Castlebar" "Claremorris"
## [119] "Swinford" "Westport"
## [121] "Meath County" "Kells"
## [123] "Navan" "Trim"
## [125] "Monaghan County" "Carrickmacross"
## [127] "Castleblayney" "Clones"
## [129] "Monaghan" "North Tipperary"
## [131] "Nenagh" "Roscrea"
## [133] "Thurles" "Offaly County"
## [135] "Birr" "Edenderry"
## [137] "Tullamore" "Roscommon County"
## [139] "Boyle" "Castlerea"
## [141] "Roscommon" "Sligo County"
## [143] "Sligo" "Tubbercurry"
## [145] "South Tipperary" "Cahir"
## [147] "Carrick-On-Suir" "Cashel"
## [149] "Clonmel" "Tipperary"
## [151] "Waterford County" "Dungarvan"
## [153] "Lismore" "Waterford City"
## [155] "Westmeath County" "Athlone"
## [157] "Castlepollard" "Mullingar"
## [159] "Wexford County" "Enniscorthy"
## [161] "Gorey" "New Ross"
## [163] "Wexford" "Wicklow County"
## [165] "Arklow" "Baltinglass"
## [167] "Blessington" "Bray"
## [169] "Wicklow"
We will now set up a function that we can use to reshape our dataset.
We then execute the function to reshape our dataset and also pick up the first timepoint and last timepoint into a variable for later use.
reshapeLRM30 <- function(px.object) {
datasetwcodes <- as.data.frame(px.object, use.codes = TRUE)
outds <- subset(datasetwcodes, Statistic == "LRM30C1")
outds$LRM30C1 <- outds$dat
outdstemp <- subset(datasetwcodes, Statistic == "LRM30C2")
outds$LRM30C2 <- outdstemp$dat
outdstemp <- subset(datasetwcodes, Statistic == "LRM30C3")
outds$LRM30C3 <- outdstemp$dat
outdstemp <- subset(datasetwcodes, Statistic == "LRM30C4")
outds$LRM30C4 <- outdstemp$dat
outdstemp <- subset(datasetwcodes, Statistic == "LRM30C5")
outds$LRM30C5 <- outdstemp$dat
outds <- outds[c("LRM30C1", "LRM30C2", "LRM30C3", "LRM30C4", "LRM30C5",
"Social.Welfare.Office", "Age.Group", "Sex", "Month")]
return(outds)
}
LRM30.px.data <- reshapeLRM30(LRM30.px.object)
LRM30.timepoint.first <- c(as.numeric(substr(LRM30.px.data$Month[1], 1, 4)),
as.numeric(substr(LRM30.px.data$Month[1], 6, 7)))
LRM30.timepoint.last <- c(as.numeric(substr(LRM30.px.data$Month[length(LRM30.px.data$Month)],
1, 4)), as.numeric(substr(LRM30.px.data$Month[length(LRM30.px.data$Month)],
6, 7)))
LRM30.timepoint.last
## [1] 2013 10
# str(LRM30.px.data)
Next we want to make sure and add in the labels to the factors
head(LRM30.px.data)
## LRM30C1 LRM30C2 LRM30C3 LRM30C4 LRM30C5 Social.Welfare.Office Age.Group
## 1 13730 NA NA NA NA - 355
## 2 65570 NA NA NA NA - 365
## 3 79300 NA NA NA NA - 405
## 4 137260 NA NA NA NA - 415
## 5 359410 NA NA NA NA - 430
## 6 97520 NA NA NA NA - 465
## Sex Month
## 1 - 2011M04
## 2 - 2011M04
## 3 - 2011M04
## 4 - 2011M04
## 5 - 2011M04
## 6 - 2011M04
# LRM30.px.LABELS$Social.Welfare.Office LRM30.px.CODES$Social.Welfare.Office
LRM30.px.data$Social.Welfare.Office <- factor(LRM30.px.data$Social.Welfare.Office,
levels = c(LRM30.px.CODES$Social.Welfare.Office), labels = c(LRM30.px.LABELS$Social.Welfare.Office))
LRM30.px.data$Sex <- factor(LRM30.px.data$Sex, levels = c(LRM30.px.CODES$Sex),
labels = c(LRM30.px.LABELS$Sex))
LRM30.px.data$Age.Group <- factor(LRM30.px.data$Age.Group, levels = c(LRM30.px.CODES$Age.Group),
labels = c(LRM30.px.LABELS$Age.Group))
# head(LRM30.px.data)
The next step creates a function that we can use to easily make selections from LRM30
The function also calculates rates for Joiners, Stayers etc that can be used for comparing across counties and Welfare Offices. Note it is a crude estimate of the rate in that it uses the current stock as the denominator.
selectLRM30data <- function(dataset, LRMLWO, LRMAge, LRMSex) {
# This section now needed to convert codes/levels to labels LRMLWO <- c('-')
# for testing
tlabel <- NULL
tlabel
tlabel$codes <- LRM30.px.CODES$Social.Welfare.Office
tlabel$labels <- LRM30.px.LABELS$Social.Welfare.Office
LRMLWOLabels <- subset(as.data.frame(tlabel), codes %in% c(LRMLWO), select = c(labels))
LRMLWOLabels
# LRMAge <- c('-') for testing
tlabel <- NULL
tlabel
tlabel$codes <- LRM30.px.CODES$Age.Group
tlabel$labels <- LRM30.px.LABELS$Age.Group
LRMAgeLabels <- subset(as.data.frame(tlabel), codes %in% c(LRMAge), select = c(labels))
LRMAgeLabels
# LRMSex <- c('-') for testing
tlabel <- NULL
tlabel
tlabel$codes <- LRM30.px.CODES$Sex
tlabel$labels <- LRM30.px.LABELS$Sex
LRMSexLabels <- subset(as.data.frame(tlabel), codes %in% c(LRMSex), select = c(labels))
LRMSexLabels
outds <- subset(dataset, Age.Group %in% LRMAgeLabels$labels & Social.Welfare.Office %in%
LRMLWOLabels$labels & Sex %in% LRMSexLabels$labels)
outds$JoinRate <- outds$LRM30C3/outds$LRM30C1
outds$LeaveRate <- outds$LRM30C4/outds$LRM30C1
outds$StayRate <- outds$LRM30C2/outds$LRM30C1
outds$P35EmployRate <- outds$LRM30C5/outds$LRM30C1
return(outds)
}
Next we want to subset our data and create time series objects for high level, state, all sexes, Live Register.
If we want to read in Males oc a certain age, for Kenmare Welfare Office all we simply have to check the PC-Axis file or object for the relevant codes and pass those codes to the function when creating the chartdata.
chartdata <- selectLRM30data(LRM30.px.data, c("-"), c("-"), c("-"))
# chartdata
ts.LRM30C1 <- ts(chartdata$LRM30C1, start = LRM30.timepoint.first, end = LRM30.timepoint.last,
frequency = 12)
ts.LRM30C2 <- ts(chartdata$LRM30C2, start = LRM30.timepoint.first, end = LRM30.timepoint.last,
frequency = 12)
ts.LRM30C3 <- ts(chartdata$LRM30C3, start = LRM30.timepoint.first, end = LRM30.timepoint.last,
frequency = 12)
ts.LRM30C4 <- ts(chartdata$LRM30C4, start = LRM30.timepoint.first, end = LRM30.timepoint.last,
frequency = 12)
ts.LRM30C5 <- ts(chartdata$LRM30C5, start = LRM30.timepoint.first, end = LRM30.timepoint.last,
frequency = 12)
ts.StayRate <- ts(chartdata$StayRate, start = LRM30.timepoint.first, end = LRM30.timepoint.last,
frequency = 12)
ts.LeaveRate <- ts(chartdata$LeaveRate, start = LRM30.timepoint.first, end = LRM30.timepoint.last,
frequency = 12)
ts.JoinRate <- ts(chartdata$JoinRate, start = LRM30.timepoint.first, end = LRM30.timepoint.last,
frequency = 12)
ts.JoinRate
## Jan Feb Mar Apr May Jun Jul Aug
## 2011 NA 0.07533 0.09724 0.09651 0.05742
## 2012 0.07322 0.06383 0.06655 0.05284 0.09503 0.09029 0.08712 0.05284
## 2013 0.06922 0.06302 0.06889 0.05489 0.08962 0.09149 0.07616 0.06579
## Sep Oct Nov Dec
## 2011 0.10962 0.03683 0.06764 0.07122
## 2012 0.06482 0.06826 0.06594 0.06951
## 2013 0.06595 0.07100
Now that we have created time series objects with our data, we can now fit basic models using the forecast package.
fit.LRM30C1 <- stl(ts.LRM30C1, s.window = "period", na.action = na.omit)
fit.LRM30C2 <- stl(ts.LRM30C2, s.window = "period", na.action = na.omit)
fit.LRM30C3 <- stl(ts.LRM30C3, s.window = "period", na.action = na.omit)
fit.LRM30C4 <- stl(ts.LRM30C4, s.window = "period", na.action = na.omit)
# fit.LRM30C5 <- stl(ts.LRM30C5,s.window='period',na.action=na.omit)
fit.StayRate <- stl(ts.StayRate, s.window = "period", na.action = na.omit)
fit.LeaveRate <- stl(ts.LeaveRate, s.window = "period", na.action = na.omit)
fit.JoinRate <- stl(ts.JoinRate, s.window = "period", na.action = na.omit)
# attributes(fit.LRM30C1$time.series)
Once we have created a time series model it is now quite easy to plot it.
We can simply pass the model directly to the plot command and see the default plot. LRM30C1 refers to teh Live Register stock figures.
plot(fit.LRM30C1)
Alternatively we can take a little more control of the plot in terms of what we are looking at. In this case we will look at Live register and Stayers for both actual figures and trend.
plot.ts(ts.LRM30C1, type = "n", xaxt = "n", ylim = c(min(ts.LRM30C1, ts.LRM30C2,
na.rm = TRUE), max(ts.LRM30C1, ts.LRM30C2, na.rm = TRUE)), main = "Live register and Stayers",
ylab = "Persons", xlab = "month")
axis(1, at = time(ts.LRM30C1), tck = 0.02, labels = LRM30.px.LABELS$Month)
axis(3, at = time(ts.LRM30C3), tck = 0.02, labels = FALSE)
lines(ts.LRM30C1, col = "blue")
lines(ts.LRM30C2, col = "red")
lines(fit.LRM30C1$time.series[, 2], col = "blue") #trend
lines(fit.LRM30C2$time.series[, 2], col = "red") #trend
We can also look at the movers (Joiners and Leavers) in a similar way
plot(ts.LRM30C3, type = "n", xaxt = "n", ylim = c(min(-ts.LRM30C4, na.rm = TRUE),
max(ts.LRM30C3, na.rm = TRUE)), main = "Joiners (Blue), Leavers (Red negative) and Net joiners (Green)\n Actual + Trend",
ylab = "Persons", xlab = "month")
axis(1, at = time(ts.LRM30C3), tck = 0.02, labels = LRM30.px.LABELS$Month)
axis(3, at = time(ts.LRM30C3), tck = 0.02, labels = FALSE) #,labels=LRM30.px.LABELS$Month) # Labels over lap with title
abline(h = 0, col = "grey")
lines(ts.LRM30C3, col = "blue")
lines(-ts.LRM30C4, col = "red")
lines(fit.LRM30C3$time.series[, 2], col = "blue")
lines(-fit.LRM30C4$time.series[, 2], col = "red")
lines(ts.LRM30C3 - ts.LRM30C4, col = "dark green")
lines(fit.LRM30C3$time.series[, 2] - fit.LRM30C4$time.series[, 2], col = "dark green")
Now that we have the trend it might be useful to see how the Liver Register in a dynamic way.
We will next attempt to show this in a graph to show how the dynamics (joiners and leavers) of each county compare, while also including a reference to the size of teh LR in each county.
To do this we must first create our dataset.
The dataset we will require will be based on the county estimates, these can be selected by taking those Welfare Office codes that have a length of 2 digits, this is a hierarchical type variable with codes of length representing counties and those of length 4 representing Welfare Offices (the first two codes in the welfare office codes denote the county the office is associated with). This is where there are advantages in knowing the structure of your dataset.
Once we have selected this dataset, we determine the rank of each county on a number of variables variable. This may be useful for other analyses.
LWOds <- NULL
LWOdsCounties <- NULL
# LWOds
LWOds$codes <- LRM30.px.CODES$Social.Welfare.Office
LWOds$labels <- LRM30.px.LABELS$Social.Welfare.Office
LWOds$ncharcodes <- nchar(LWOds$codes)
# LWOds
LWOdsCounties <- subset(as.data.frame(LWOds), ncharcodes == 2 | codes == "8000",
select = c(codes)) #ncharcodes==2)
# head(LWOdsCounties)
# LWOdsCounties
CountyCodes <- as.character(LWOdsCounties$codes)
# CountyCodes
LRM30CountyTotsdata <- NULL
LRM30CountyTotsdata <- selectLRM30data(LRM30.px.data, c(CountyCodes), c("-"),
c("-"))
LRM30CountyTotsdata <- transform(LRM30CountyTotsdata, LeaveRate.rank = ave(LeaveRate,
Month, FUN = function(x) rank(-x, ties.method = "first")))
LRM30CountyTotsdata <- transform(LRM30CountyTotsdata, JoinRate.rank = ave(JoinRate,
Month, FUN = function(x) rank(-x, ties.method = "first")))
LRM30CountyTotsdata <- transform(LRM30CountyTotsdata, StayRate.rank = ave(StayRate,
Month, FUN = function(x) rank(-x, ties.method = "first")))
LRM30CountyTotsdata <- transform(LRM30CountyTotsdata, P35EmployRate.rank = ave(P35EmployRate,
Month, FUN = function(x) rank(-x, ties.method = "first")))
LRM30CountyTotsdata <- transform(LRM30CountyTotsdata, NetLeaveRate.rank = ave(LeaveRate -
JoinRate, Month, FUN = function(x) rank(-x, ties.method = "first")))
# head(LRM30CountyTotsdata )
Our first plot will look at plotting each county on an x y axes representing join rate and leave rate with a circle representing the stock of the Live Register. This is done for the latest Month.
# To get the last Month the following code was used to validate in
# LatestCountyTots LastMonth <-
# levels(LRM30CountyTotsdata$Month)[length(Months)] LastMonth
CountyTotsLatest <- subset(as.data.frame(LRM30CountyTotsdata), Month == levels(LRM30CountyTotsdata$Month)[length(levels(LRM30CountyTotsdata$Month))])
# CountyTotsLatest str(CountyTotsLatest)
cplatest <- ggplot(CountyTotsLatest, aes(x = LeaveRate, Y = JoinRate)) + ylim(0.03,
max(CountyTotsLatest$JoinRate, CountyTotsLatest$LeaveRate, na.rm = TRUE)) +
xlim(0.03, max(CountyTotsLatest$LeaveRate, CountyTotsLatest$JoinRate, na.rm = TRUE)) +
geom_abline(slope = 1) + geom_point(y = CountyTotsLatest$JoinRate, x = CountyTotsLatest$LeaveRate,
size = 4 * CountyTotsLatest$LRM30C1/mean(CountyTotsLatest$LRM30C1), aes(colour = CountyTotsLatest$Social.Welfare.Office),
shape = 19) + ylab("Join Rate (Joiners/Live Register)") + xlab("Leave Rate (Leavers/Live Register)") +
ggtitle(paste("Live register, by Leave Rate, Join Rate and County Month ",
levels(LRM30CountyTotsdata$Month)[length(levels(LRM30CountyTotsdata$Month))],
sep = "")) + guides(colour = guide_legend(ncol = 2))
cplatest
Cycle through each month to see how the Live Register dynamic changes from month to month
for (J in 2:length(levels(LRM30CountyTotsdata$Month))) {
# for (J in 2:4){
MonthJ <- levels(LRM30CountyTotsdata$Month)[J]
print(MonthJ)
CountiesMonthdata <- subset(as.data.frame(LRM30CountyTotsdata), Month ==
c(MonthJ))
cpp <- ggplot(CountiesMonthdata, aes(x = LeaveRate, Y = JoinRate)) + ylim(0.03,
0.12) + xlim(0.03, 0.12) + geom_abline(slope = 1) + geom_point(y = CountiesMonthdata$JoinRate,
x = CountiesMonthdata$LeaveRate, size = 8 * CountiesMonthdata$LRM30C1/mean(CountiesMonthdata$LRM30C1),
aes(colour = CountiesMonthdata$Social.Welfare.Office), shape = 19) +
ylab("Join Rate (Joiners/Live Register)") + xlab("Leave Rate (Leavers/Live Register)") +
ggtitle(paste(MonthJ, "Live register, by Leave Rate, Join Rate and County",
seps = " ")) + guides(colour = guide_legend(ncol = 2)) + theme(text = element_text(size = 16),
axis.text.x = element_text(size = 13, colour = "blue"), axis.text.y = element_text(size = 13,
colour = "red"), axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
legend.title = element_text(size = 12), legend.text = element_text(size = 12))
print(cpp)
}
## [1] "2011M05"
## [1] "2011M06"
## [1] "2011M07"
## [1] "2011M08"
## [1] "2011M09"
## [1] "2011M10"
## [1] "2011M11"
## [1] "2011M12"
## [1] "2012M01"
## [1] "2012M02"
## [1] "2012M03"
## [1] "2012M04"
## [1] "2012M05"
## [1] "2012M06"
## [1] "2012M07"
## [1] "2012M08"
## [1] "2012M09"
## [1] "2012M10"
## [1] "2012M11"
## [1] "2012M12"
## [1] "2013M01"
## [1] "2013M02"
## [1] "2013M03"
## [1] "2013M04"
## [1] "2013M05"
## [1] "2013M06"
## [1] "2013M07"
## [1] "2013M08"
## [1] "2013M09"
## [1] "2013M10"