# This code is a modified version of Drew Conway's code found at
# https://github.com/drewconway/ZIA/tree/master/R/Chicago
# Load libraries
library(ggplot2)
library(maptools)
# Read in Crimes.csv
crimes <- read.csv("Crimes.csv", stringsAsFactors=FALSE)
# # Get count of crimes by day, type, and location.
# # Notice the typo in the header for this data
crimes.by.day <- ddply(crimes, .(DATE..OF.OCCURRENCE, PRIMARY.DECSRIPTION, X.COORDINATE, Y.COORDINATE),
summarise, COUNT=length(ARREST))
#
# # Fix date and sort
crimes.by.day$DATE <- as.Date(crimes.by.day$DATE..OF.OCCURRENCE, format="%m/%d/%Y")
crimes.by.day <- crimes.by.day[with(crimes.by.day, order(DATE)),]
write.csv(crimes.by.day, "crimes_by_day.csv", row.names=FALSE)
# Load in clean data
crimes.by.day <- read.csv("crimes_by_day.csv", stringsAsFactors=FALSE)
crimes.by.day$DATE <- as.Date(crimes.by.day$DATE)
# Find the daily count of each crime type for each day
# cumul.crimes has the daily cumulative crimes for each primary crime type
cumul.crimes <- ddply(crimes.by.day, .(PRIMARY.DECSRIPTION, DATE), summarise, DAYCOUNT=sum(COUNT),
.progress="text")
cumul.crimes <- cumul.crimes[with(cumul.crimes, order(DATE)),]
# Creater broader categories of crimes
# Violent Crimes only
vio_crimes <- c("BATTERY", "ASSAULT", "CRIM SEXUAL ASSAULT", "HOMICIDE", "INTIMIDATION")
# Property Crimes only
prop_crimes <- c("THEFT","CRIMINAL DAMAGE", "BURGLARY", "MOTOR VEHICLE THEFT", "ROBBERY", "CRIMINAL TRESPASS")
# "Vice" crimes only
vice_crimes <- c("NARCOTICS", "PROSTITUTION", "GAMBLING", "LIQUOR LAW VIOLATION")
cumul.crimes$MAJORTYPE <- NA
cumul.crimes$MAJORTYPE <- with(cumul.crimes, ifelse(match(PRIMARY.DECSRIPTION, vio_crimes) & is.na(MAJORTYPE),
"VIOLENT", MAJORTYPE))
cumul.crimes$MAJORTYPE <- with(cumul.crimes, ifelse(match(PRIMARY.DECSRIPTION, prop_crimes) & is.na(MAJORTYPE),
"PROPERTY", MAJORTYPE))
cumul.crimes$MAJORTYPE <- with(cumul.crimes, ifelse(match(PRIMARY.DECSRIPTION, vice_crimes) & is.na(MAJORTYPE),
"VICE", MAJORTYPE))
cumul.crimes <- subset(cumul.crimes, !is.na(MAJORTYPE))
# Total crimes is used for line plot
total.crimes <- ddply(cumul.crimes, .(DATE, MAJORTYPE),
summarise, CUMDAYCOUNT=sum(DAYCOUNT), .progress="text")
total.crimes <- total.crimes[with(total.crimes, order(DATE)),]
total.crimes$CUMYEARCOUNT <- NA
total.crimes$CUMYEARCOUNT[which(total.crimes$MAJORTYPE=="PROPERTY")] <- with(total.crimes,
cumsum(CUMDAYCOUNT[which(MAJORTYPE=="PROPERTY")]))
total.crimes$CUMYEARCOUNT[which(total.crimes$MAJORTYPE=="VIOLENT")] <- with(total.crimes,
cumsum(CUMDAYCOUNT[which(MAJORTYPE=="VIOLENT")]))
total.crimes$CUMYEARCOUNT[which(total.crimes$MAJORTYPE=="VICE")] <- with(total.crimes,
cumsum(CUMDAYCOUNT[which(MAJORTYPE=="VICE")]))
total.crimes$MAJORTYPE <- as.factor(total.crimes$MAJORTYPE)
# read in shapefile of Chicago Police Department's (CPD) Beats
cpd.shp <- readShapePoly("cpd_beats.shp")
## fortify.SpatialPolygons() will convert a Spatial Polygon into a regular dataframe of coordinates
cpd.df <- fortify.SpatialPolygons(cpd.shp)
# major.crimes.by.day is used for new geospatial plot
crimes.by.day$MAJORTYPE <- NA
crimes.by.day$MAJORTYPE <- with(crimes.by.day, ifelse(match(PRIMARY.DECSRIPTION, vio_crimes) & is.na(MAJORTYPE),
"VIOLENT", MAJORTYPE))
crimes.by.day$MAJORTYPE <- with(crimes.by.day, ifelse(match(PRIMARY.DECSRIPTION, prop_crimes) & is.na(MAJORTYPE),
"PROPERTY", MAJORTYPE))
crimes.by.day$MAJORTYPE <- with(crimes.by.day, ifelse(match(PRIMARY.DECSRIPTION, vice_crimes) & is.na(MAJORTYPE),
"VICE", MAJORTYPE))
major.crimes.by.day <- subset(crimes.by.day, !is.na(MAJORTYPE))
# This loop will create the ggplots for the map and line plots for each day in date.range
date.range <- seq.Date(as.Date(min(major.crimes.by.day$DATE)), as.Date(max(major.crimes.by.day$DATE)), "days")
day <- 1
mycolors <- c("PROPERTY"= "darkolivegreen3","VICE"="dodgerblue", "VIOLENT"="firebrick1")
for(day in 1:length(date.range))
{
line.plot <- ggplot(subset(total.crimes, DATE <= date.range[day]), aes(x=DATE, y=CUMDAYCOUNT, group=MAJORTYPE)) +
geom_line(aes(colour=MAJORTYPE), lwd=1)+
theme_bw()+
scale_colour_manual(name="Type of Crime", values=mycolors,
breaks=c(1,3,2), labels=c("Property", "Violent", "Vice"),
legend=FALSE) +
xlab("")+ ylab("Crimes Per Day") +
opts(axis.title.y = theme_text(colour="black", size = 10, hjust=1, angle=90)) +
scale_y_continuous(limits=c(0, 700))+
scale_x_date(limits=c(as.Date("2010-06-01", format="%Y-%m-%d"),
as.Date("2011-07-01", format="%Y-%m-%d")),
format="%b\n%Y", major="1 month")
day.plot <- ggplot(cpd.df, aes(x=long, y=lat)) +geom_path(aes(group=group))
day.plot <- day.plot+
theme_bw()+
opts(title=paste("Cumulative Crime in Chicago\n",
strftime(min(major.crimes.by.day$DATE), format="%b %d, %Y"),"to",
strftime(date.range[day], format="%b %d, %Y")),
panel.grid.major=theme_blank(),
panel.grid.minor=theme_blank(),
axis.text.x=theme_blank(),
axis.text.y=theme_blank(),
axis.ticks=theme_blank())
if(nrow(cpd.df) > 0) {
day.plot <- day.plot+
geom_point(data=subset(major.crimes.by.day, DATE <= date.range[day]),
aes(x=X.COORDINATE, y=Y.COORDINATE,
color=MAJORTYPE, size=5, alpha=.01))+
theme_bw()+
scale_size(legend=FALSE, to=c(.7))+
scale_alpha(legend=FALSE, to=c(.01, .8))+
scale_colour_manual(name="Type of Crime", values=mycolors,
breaks=c("PROPERTY","VIOLENT", "VICE"),
labels=c("Property", "Violent", "Vice"))+
opts(panel.grid.major = theme_blank())+
opts(panel.grid.minor = theme_blank())+
opts(axis.text.x = theme_blank())+
opts(axis.text.y = theme_blank())+
opts(axis.ticks = theme_blank()) +
opts(legend.position = c(.87,.78))+
opts(legend.text = theme_text(size = 14)) +
opts(legend.title = theme_text(size = 16, face = "bold", hjust = 0))+
opts(legend.key.size = unit(2, "lines"))+
xlab("") + ylab("")
grob <- ggplotGrob(day.plot)
grob <- geditGrob(grob, size=unit(8, "mm"), "key.points", grep=T)
grid.newpage()
grid.draw(grob)
}
# Save image
png(filename=paste("maps/", day, ".png", sep=""), units="in",
res=300, height=9, width=10)
grid.draw(grob)
dev.off()
ggsave(plot=line.plot, filename=paste("timelines/", day, "_time.png", sep=""),
height=2, width=10)
day <- day + 1
print(day)
}
## standalone plots
## Daily Crime Rate plot
line.plot <- ggplot(subset(total.crimes, DATE <= date.range[358]), aes(x=DATE, y=CUMDAYCOUNT, group=MAJORTYPE)) +
geom_line(aes(colour=MAJORTYPE), lwd=1)+
theme_bw()+
scale_colour_manual(name="Type of Crime", values=mycolors,
breaks=c(1,3,2), labels=c("Property", "Violent", "Vice"),
legend=FALSE) +
xlab("")+ ylab("Crimes Per Day") +
opts(axis.title.y = theme_text(colour="black", size = 10, hjust=.5, angle=90)) +
scale_y_continuous(limits=c(0, 700))+
scale_x_date(limits=c(as.Date("2010-06-01", format="%Y-%m-%d"),
as.Date("2011-07-01", format="%Y-%m-%d")),
format="%b\n%Y", major="1 month")
## Map of crimes plot
day.plot <- ggplot(cpd.df, aes(x=long, y=lat)) +geom_path(aes(group=group))
day.plot <- day.plot+
theme_bw()+
opts(title=paste("Crime in Chicago\n June 20, 2010 to June 13, 2011"),
panel.grid.major=theme_blank(),
panel.grid.minor=theme_blank(),
axis.text.x=theme_blank(),
axis.text.y=theme_blank(),
axis.ticks=theme_blank())
day.plot <- day.plot+
geom_point(data=major.crimes.by.day,
aes(x=X.COORDINATE, y=Y.COORDINATE,
color=MAJORTYPE, size=5, alpha=.01))+
theme_bw()+
scale_size(legend=FALSE, to=c(.7))+
scale_alpha(legend=FALSE, to=c(.01, .8))+
scale_colour_manual(name="Type of Crime", values=mycolors,
breaks=c("PROPERTY","VIOLENT", "VICE"),
labels=c("Property", "Violent", "Vice"))+
opts(panel.grid.major = theme_blank())+
opts(panel.grid.minor = theme_blank())+
opts(axis.text.x = theme_blank())+
opts(axis.text.y = theme_blank())+
opts(axis.ticks = theme_blank()) +
opts(legend.position = c(.87,.78))+
opts(legend.text = theme_text(size = 14)) +
opts(legend.title = theme_text(size = 16, face = "bold", hjust = 0))+
opts(legend.key.size = unit(2, "lines"))+
xlab("") + ylab("")
grob <- ggplotGrob(day.plot)
grob <- geditGrob(grob, size=unit(8, "mm"), "key.points", grep=T)
grid.newpage()
grid.draw(grob)