數(shù)據(jù)來源:Airplane Crashes Since 1908 (kaggle.com)
代碼參考:Exploring historic Air Plane crash data | Kaggle
指標(biāo)名 |
含義 |
Date |
事故發(fā)生日期(年-月-日) |
Time |
當(dāng)?shù)貢r間,24小時制,格式為hh:mm |
Location |
事故發(fā)生的地點 |
Operator |
航空公司或飛機的運營商 |
Flight |
由飛機操作員指定的航班號 |
Route |
事故前飛行的全部或部分航線 |
Type |
飛機類型 |
Registration |
國際民航組織對飛機的登記 |
cn/In |
結(jié)構(gòu)號或序列號/線號或機身號 |
Aboard |
機上人數(shù) |
Fatalities |
死亡人數(shù) |
Ground |
地面死亡人數(shù) |
Summary |
事故的簡要描述和原 |
library(tidyverse)
library(lubridate)
library(plotly)
library(gridExtra)
library(usmap)
library(igraph)
library(tidytext)
library(tm)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)
library(readxl)
df<- read.csv('F:\\Airplane_Crashes_and_Fatalities_Since_1908.csv',stringsAsFactors = FALSE)
df <- as_tibble(df)
head(df)
dim(df)
colnames(df)
df[is.na(df)] <- 0
df$Date <- mdy(df$Date)
df$Time <- hm(df$Time)
df$Year <- year(df$Date)
df$Month <- as.factor(month(df$Date))
df$Day <- as.factor(day(df$Date))
df$Weekday <- as.factor(wday(df$Date))
df$Week_no <- as.factor(week(df$Date))
df$Quarter <- as.factor(quarter(df$Date))
df$Is_Leap_Year <- leap_year(df$Date)
df$Decade <- year(floor_date(df$Date, years(10)))
df$Hour <- as.integer(hour(df$Time))
df$Minute <- as.factor(minute(df$Time))
df$AM_PM <- if_else(am(df$Time), 'AM', 'PM')
df$btwn_6PM_6AM <- if_else(df$Hour <= 6 | df$Hour >= 18, '6PM-6AM', '6AM-6PM')
year_wise <- df %>% count(Year)
day_wise <- df %>% count(Day)
week_day_wise <- df %>% count(Weekday)
month_wise <- df %>% count(Month)
week_no_wise <- df %>% count(Week_no)
q_wise <- df %>% count(Quarter)
hour_wise <- df %>% count(Hour)
am_pm_wise <- df %>% count(AM_PM)
btwn_6PM_6AM_wise <- df %>% count(btwn_6PM_6AM)
Fatalities_wise <- df %>% count(Fatalities)
#圖1:自1980年來每年失事飛機失事次數(shù)柱狀圖
ggplot(year_wise, aes(x = Year, y = n)) +
geom_col(fill = '#0f4c75', col = 'white') +
labs(title = '自1908年以來每年發(fā)生的飛機失事次數(shù)', x = '', y = '') +
scale_x_continuous(breaks = seq(1908, 2020, 4))
#圖2:失事飛機失事次數(shù)柱狀圖(按一周第幾天、一月第幾天統(tǒng)計)
wd <- ggplot(week_day_wise, aes(x = Weekday, y = n)) +
geom_col(fill = '#3b6978', col = 'white')+
labs(title = '按周的每一天統(tǒng)計飛機失事次', x = '', y = '')
d <- ggplot(day_wise, aes(x = Day, y = n)) +
geom_col(fill = '#b83b5e', col = 'white')+
labs(title = '按月的每一天統(tǒng)計飛機失事次', x = '', y = '')
grid.arrange(wd, d, nrow = 1, widths = c(1, 3))
#圖3:失事飛機失事次數(shù)柱狀圖(按一年第幾月、第幾周、第幾季度統(tǒng)計)
m <- ggplot(month_wise, aes(x = Month, y = n)) +
geom_col(fill = '#ffcb74', col = 'white') +
labs(title = '按月統(tǒng)計', x = '', y = '')
wn <- ggplot(week_no_wise, aes(x = Week_no, y = n)) +
geom_col(fill = '#4f8a8b', col = 'white') +
labs(title = '按周統(tǒng)計', x = '', y = '')
q <- ggplot(q_wise, aes(x = Quarter, y = n)) +
geom_col(fill = '#ea907a', col = 'white') +
labs(title = '按季度統(tǒng)計', x = '', y = '')
grid.arrange(m, wn, q, nrow = 1, widths = c(2, 5, 1))
#圖4:失事飛機失事次數(shù)柱狀圖(按一天第幾小時、一天中上下午度統(tǒng)計)
h <- ggplot(hour_wise, aes(x = Hour, y = n)) +
geom_col(fill = '#BD956A') +
labs(title = '按小時統(tǒng)計', x = '', y = '')
a <- ggplot(am_pm_wise, aes(x = AM_PM, y = n, fill = AM_PM)) +
geom_col() +
labs(title = '上午-下午', x = '', y = '') +
scale_fill_brewer(palette = "Set1") +
theme(legend.position = "none")
n <- ggplot(btwn_6PM_6AM_wise, aes(x = btwn_6PM_6AM, y = n, fill = btwn_6PM_6AM)) +
geom_col() +
labs(title = '白天&夜間', x = '', y = '') +
scale_fill_brewer(palette = "Dark2") +
theme(legend.position = "none")
grid.arrange(h, a, n, nrow = 1, layout_matrix = rbind(c(1,1,1,1,2),c(1,1,1,1,3)))
#圖5:失事飛機型號統(tǒng)計條形圖
# 按類型分組
type_wise <- df %>%
count(Type, sort = TRUE)
#按制造商提取和分組
main_type_wise <- df %>%
#用空字符串替換型號
mutate(main_type = str_replace_all(Type, "[A-Za-z]*-?\\d+-?[A-Za-z]*.*", "")) %>%
count(main_type, sort = TRUE) %>%
# 跳過空字符串行
filter(main_type > 'A')
options(repr.plot.width = 12)
# 失事飛機的型號排名(前20)
ggplot(head(type_wise, 20), aes(reorder(Type, n) , n, fill = n)) +
geom_col(fill = 'deepskyblue2') +
geom_text(aes(label = n), hjust = 1.5, colour = "white", size = 5, fontface = "bold") +
labs(title = '失事飛機的型號統(tǒng)計', x = '', y = '') +
coord_flip()
#圖6:失事飛機制造商統(tǒng)計條形圖
ggplot(head(main_type_wise, 10), aes(reorder(main_type, n), n, fill = n)) +
geom_col(fill = 'deepskyblue2') +
geom_text(aes(label = n), hjust = 1.5, colour = "white", size = 5, fontface = "bold") +
labs(title = '失事飛機的制造商統(tǒng)計', x = '', y = '')+
coord_flip()
#圖7:失事飛機(包括軍事飛機)運營商統(tǒng)計條形圖
#運營商統(tǒng)計
operator_wise <- df %>%
count(Operator, sort = TRUE)
#商業(yè)運營商表
main_op_wise <- df %>%
# replace all group of words followed by '-'
mutate(main_op = str_replace_all(Operator, ' -.*', '')) %>%
filter(!str_detect(main_op, '[Mm]ilitary')) %>%
filter(!str_detect(main_op, 'Private')) %>%
count(main_op, sort = TRUE) %>%
filter(main_op > 'A')
# 提取軍事飛行數(shù)據(jù)
force <- operator_wise %>%
filter(str_detect(Operator, '[Mm]ilitary')) %>%
mutate(op = str_replace_all(Operator, 'Military ?-? ?', '')) %>%
count(op, sort = TRUE)
#提取軍事飛機所屬國家
force_country <- operator_wise %>%
# 獲取包含字符串“軍用”的行'military'
filter(str_detect(Operator, 'Military|military')) %>%
# 將帶有包含國家信息的字符串替換為國家名
mutate(op = str_replace_all(Operator, 'Royal Air Force', 'UK')) %>%
mutate(op = str_replace_all(op, 'Military ?-? ?|Royal', '')) %>%
mutate(op = str_replace_all(op, ' (Navy|Army|Air|Maritime Self Defense|Marine Corps|Naval|Defence|Armed) ?.*', '')) %>%
mutate(op = str_replace_all(op, '.*U\\.? ?S\\.?.*|United States|American', 'USA')) %>%
mutate(op = str_replace_all(op, 'Aeroflot ?/? ?', '')) %>%
mutate(op = str_replace_all(op, '.*Republic? ?of', '')) %>%
mutate(op = str_replace_all(op, '.*British.*', 'UK')) %>%
mutate(op = str_replace_all(op, '.*Indian.*', 'Indian')) %>%
mutate(op = str_replace_all(op, '.*Chin.*', 'Chinese')) %>%
mutate(op = str_replace_all(op, '.*Chilean.*', 'Chilian')) %>%
mutate(op = str_replace_all(op, '.*Iran.*', 'Iran')) %>%
mutate(op = str_replace_all(op, '.*French.*', 'French')) %>%
mutate(op = str_replace_all(op, '.*Ecuador.*', 'Ecuadorean')) %>%
mutate(op = str_replace_all(op, '.*Zambia.*', 'Zambian')) %>%
mutate(op = str_replace_all(op, '.*Russia.*', 'Russian')) %>%
mutate(op = str_replace_all(op, '.*Afghan.*', 'Afghan')) %>%
group_by(op) %>%
summarize(n = sum(n)) %>%
arrange(desc(n))
#軍用飛行與非軍用飛行
yr_military <- df %>%
select(Year, Operator) %>%
mutate(Is_Military = str_detect(Operator, 'Military|military')) %>%
group_by(Year, Is_Military) %>%
summarize(n = n())
ggplot(head(operator_wise, 10), aes(reorder(Operator, n) , n, fill = n))+
geom_col(fill = 'coral3')+
labs(title='失事飛機(包括軍事飛機在內(nèi))的運營商統(tǒng)計', x = '', y = '')+
geom_text(aes(label = n), hjust = 1.5, colour = "white", size = 5, fontface = "bold")+
coord_flip()
#圖8:失事飛機(不包括軍事飛機)運營商統(tǒng)計條形圖
ggplot(head(main_op_wise, 10), aes(reorder(main_op, n) , n, fill=n)) +
geom_col(fill='coral2') +
labs(title='失事商業(yè)飛機(不包括軍事飛機)的商業(yè)運營商統(tǒng)計', x='', y='') +
geom_text(aes(label = n), hjust = 1.5, colour = "white", size = 5, fontface = "bold") +
coord_flip()
#圖9:軍事飛機所屬軍隊、所屬國家統(tǒng)計條形圖
f <- ggplot(head(force, 10), aes(reorder(op, n) , n, fill = n))+
geom_col(fill = 'cyan4')+
labs(title = '軍事飛機失事統(tǒng)計', x = '', y = '')+
geom_text(aes(label = n), hjust = 1.5, colour = "white", size = 5, fontface = "bold")+
coord_flip()
fc <- ggplot(head(force_country, 10), aes(reorder(op, n) , n, fill = n))+
geom_col(fill = 'cyan3')+
labs(title = '軍事飛機失事的國家排名', x = '', y = '')+
geom_text(aes(label = n), hjust = 1.5, colour = "white", size = 5, fontface = "bold")+
coord_flip()
grid.arrange(f,fc, nrow = 1, widths = c(1, 1))
#圖10:自1980年來軍事飛機與非軍事失事次數(shù)柱狀圖
ggplot(yr_military, aes(x = Year, y = n, fill = Is_Military)) +
geom_col(col = 'white') +
labs(title = '失事飛機是否為軍用飛機?',x = '', y = '', fill = '') +
scale_x_continuous(breaks = seq(1908, 2020, 4)) +
scale_fill_brewer(palette = "Dark2") +
theme(legend.position = "top", legend.justification = "left")
#圖11:飛機失事地點統(tǒng)計條形圖
take_off_dest <- df %>%
select('Route') %>%
filter(Route!='') %>%
filter(str_detect(Route, ' ?- ?')) %>%
mutate(Take_Off = str_extract(Route, '[^-]* ?-?')) %>%
mutate(Take_Off = str_replace(Take_Off, ' -', ''))%>%
mutate(Destination = str_extract(Route, '- ?[^-]*$')) %>%
mutate(Destination = str_replace(Destination, '- ?', ''))
route <- take_off_dest %>% count(Route, sort = TRUE)
take_off <- take_off_dest %>% count(Take_Off, sort = TRUE)
dest <- take_off_dest %>% count(Destination, sort = TRUE)
r <- ggplot(head(route, 15), aes(reorder(Route, n) , n, fill=n))+
geom_col(fill='#E59CC4')+
labs(title='飛行途中失事路線', x='', y='')+
geom_text(aes(label=n), hjust = 1.5, colour="white", size=5, fontface="bold")+
coord_flip()
t <- ggplot(head(take_off, 15), aes(reorder(Take_Off, n) , n, fill=n))+
geom_col(fill='#005082')+
labs(title='起飛時飛機失事地點', x='', y='')+
geom_text(aes(label=n), hjust = 1.5, colour="white", size=5, fontface="bold")+
coord_flip()
d <- ggplot(head(dest, 15), aes(reorder(Destination, n) , n, fill=n))+
geom_col(fill='#ff6363')+
labs(title='落地時飛機失事地點', x='', y='')+
geom_text(aes(label=n), hjust = 1.5, colour="white", size=5, fontface="bold")+
coord_flip()
options(repr.plot.width = 18)
grid.arrange(r,t,d, nrow = 1, widths=c(1,1,1))
#圖12:全球范圍內(nèi)飛機失事熱力圖
cntry <- cntry %>%
mutate(m = case_when(
n >= 100 ~ "100 +",
n < 100 & n >= 70 ~ "70 - 100",
n < 70 & n >= 40 ~ "40 - 70",
n < 40 & n >= 10 ~ "10 - 40",
n < 10 ~ "< 10")) %>%
mutate(m = factor(m, levels = c("< 10", "10 - 40", "40 - 70", "70 - 100", "100 +")))
world_map <- map_data("world")
map_data <- cntry %>%
full_join(world_map, by = c('Country' = 'region'))
options(repr.plot.width = 18, repr.plot.height = 9)
map_pal = c("#7FC7AF", "#E4B363",'#EF6461',"#E97F02",'#313638')
ggplot(map_data, aes(x = long, y = lat, group = group, fill = m)) +
geom_polygon(colour = "white") +
labs(title = '全球范圍內(nèi)飛機失事熱力圖', x = '', y = '', fill = '') +
scale_fill_manual(values = map_pal, na.value = 'whitesmoke') +
theme(legend.position='right', legend.justification = "top") +
guides(fill = guide_legend(reverse = TRUE))
#圖13:飛機失事原因詞云圖
data <- read_excel("F:\\summary.xlsx")
corpus <- Corpus(VectorSource(data))
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
dtm <- TermDocumentMatrix(corpus)
word_freqs <- rowSums(as.matrix(dtm))
wordcloud(names(word_freqs), word_freqs, min.freq = 1, max.words=150,words_distance=0.001,random.order=FALSE,font_path='msyh.ttc',
rot.per=0.05,colors=brewer.pal(8, "Dark2"), backgroundColor = "grey",shape = 'circle',width=3, height=9)
文章來源:http://www.zghlxwxcb.cn/news/detail-835444.html
ps:低價出課程論文-多元統(tǒng)計分析論文、R語言論文、stata計量經(jīng)濟學(xué)課程論文(論文+源代碼+數(shù)據(jù)集)文章來源地址http://www.zghlxwxcb.cn/news/detail-835444.html
到了這里,關(guān)于R語言課程論文-飛機失事數(shù)據(jù)可視化分析的文章就介紹完了。如果您還想了解更多內(nèi)容,請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!