PlatinumEssays.com - Free Essays, Term Papers, Research Papers and Book Reports
Search

Segregating Long but Non-Risky Landing Distance from Risky Distance

By:   •  March 7, 2019  •  Coursework  •  6,879 Words (28 Pages)  •  765 Views

Page 1 of 28

Statistical Modelling Assignment 2- FAA Project

Varsha

February 19, 2019

segregating long but non-risky landing distance from risky distance

Packages Required

library(tidyverse)  #to visualize, transform, input, tidy and join data
library(dplyr)      #data wrangling
library(stringr)    #string related functions
library(kableExtra) #to create HTML Table
library(DT)         #to preview the data sets
library(lubridate)  #to apply the date functions
library(xlsx)       #to load excel files
library(ROCR)       #to use ROC curves
library(faraway)    #to use the ilogit function

Long Landing

Loaded the datasets and did initial data cleaning(detailed steps were performed in the first assignment)

faa1 <- read.xlsx("FAA1.xls", sheetName = "FAA1")
faa2 <- read.xlsx("FAA2_2.xls", sheetName = "Sheet1")
faa <- bind_rows(faa1, faa2)
check <- faa %>% 
 select(-duration) %>% 
  duplicated() %>% 
  which()

faa <- faa[-check,]

faa_check <- faa %>% 
  filter((duration > 40| is.na(duration)) & (speed_ground >= 30) & (speed_ground <= 140) &
           (height >= 6) & (distance < 6000))
faa <- faa_check

faa$duration_corrected <- NA
faa <-  transform(faa, duration_corrected = ifelse(is.na(faa$duration), mean(faa$duration, na.rm=TRUE), faa$duration))

Step 1:

faa <- faa %>% 
  mutate(long.landing = as.factor(ifelse(distance > 2500, 1,0 )) ,
         risky.landing  = as.factor(ifelse(distance > 3000,1,0 )),
         aircraft = as.factor(aircraft))
faa$duration <- NULL
faa$distance <- NULL

Step 2: Histogram to show distribution of “long.landing”

faa %>% 
  ggplot(aes(long.landing)) + 
  geom_bar()

[pic 1]

only 12% aircrafts have long landing

round(prop.table(table(faa$long.landing)),2)

##
##    0    1
## 0.88 0.12

Step 3:

mdl_duration <- glm (faa$long.landing ~ faa$duration_corrected, family = "binomial")
mdl_speedgrnd <- glm (faa$long.landing ~ faa$speed_ground, family = "binomial")
mdl_height <- glm (faa$long.landing ~ faa$height, family = "binomial")
mdl_pitch <- glm (faa$long.landing ~ faa$pitch, family = "binomial")
mdl_nopasg <- glm (faa$long.landing ~ faa$no_pasg, family = "binomial")
mdl_speedair <- glm (faa$long.landing ~ faa$speed_air, family = "binomial")
mdl_aircraft <- glm (faa$long.landing ~ faa$aircraft, family = "binomial")


duration <- summary(mdl_duration)$coef[2,c(1,4)]
speed_ground <- summary(mdl_speedgrnd)$coef[2,c(1,4)]
height <- summary(mdl_height)$coef[2,c(1,4)]
pitch <- summary(mdl_pitch)$coef[2,c(1,4)]
no_pasg <- summary(mdl_nopasg)$coef[2,c(1,4)]
speed_air <- summary(mdl_speedair)$coef[2,c(1,4)]
aircraft_boeing <- summary(mdl_aircraft)$coef[2,c(1,4)]
aircraft_airbus <- summary(mdl_aircraft)$coef[1,c(1,4)]

coefficients <- c(duration[1], speed_ground[1], height[1], pitch[1], no_pasg[1],speed_air[1],aircraft_boeing[1],aircraft_airbus[1])
coefficients <- round(coefficients, digits = 3)

odds_ratio <- round(exp(coefficients), 3)

p_value <- c(duration[2], speed_ground[2], height[2], pitch[2], no_pasg[2],speed_air[2],aircraft_boeing[2],aircraft_airbus[2])
p_value <- round(p_value, digits = 3)

variable_names <- c("Duration","Ground Speed","Height","Pitch","No. of Passengers","Air Speed","Aircraft-Boeing", "Aircraft-Airbus")

table_2 <- data.frame(variable_names, coefficients,odds_ratio, p_value)
table_2$slope_direction <- ifelse(coefficients > 0 , "Positive", "Negative")
table_2 <- table_2 %>% 
  select(variable_names, coefficients, odds_ratio, p_value, slope_direction) %>% 
  arrange(p_value)

table_2

...

Download:  txt (28.2 Kb)   pdf (364.3 Kb)   docx (74.7 Kb)  
Continue for 27 more pages »