Extended R Practical: Answers

###########################################
# Rebecca Wilson Newcastle University
#  Intro to R course, extended practical
# data: sim alspac
###########################################

#set the working directory
setwd("XXXXXXXXXXXXXXXX/XXXXXXXXXXX")

#read in the alspac simulated data
sim.alspac<-read.csv("alspac.csv")


#what variables are present?
colnames(sim.alspac)


# [1] "male"       "age.yrs.7"  "ht.7"       "ht.sit.7"   "ws.7"      
# [6] "hp.7"       "wt.7"       "sbp.7"      "dbp.7"      "pulse.7"   
#[11] "BMI.7"      "age.yrs.11" "ht.11"      "ht.sit.11"  "ws.11"     
#[16] "hp.11"      "wt.11"      "sbp.11"     "dbp.11"     "pulse.11"  
#[21] "BMI.11" 


#number of participants
dim(sim.alspac)


#[1] 1000   21
#1000 participants
#21 variables
#selecting and subsetting data


#split by gender
subset.male<-subset(sim.alspac, male == 1)
subset.female<-subset(sim.alspac, male == 0)


#find out the number of male and female participants
dim(subset.male)
#[1] 478  21
 
dim(subset.female)
#[1] 522  21

#######################################################
# Exploring the data
#######################################################
#summaries by gender

summary(subset.male)

#> summary(subset.male)
#      male     age.yrs.7          ht.7          ht.sit.7          ws.7      
# Min.   :1   Min.   :6.617   Min.   :110.7   Min.   :59.65   Min.   :43.99  
# 1st Qu.:1   1st Qu.:7.352   1st Qu.:121.7   1st Qu.:65.90   1st Qu.:52.15  
# Median :1   Median :7.556   Median :125.3   Median :67.79   Median :55.90  
# Mean   :1   Mean   :7.550   Mean   :125.5   Mean   :67.78   Mean   :55.99  
# 3rd Qu.:1   3rd Qu.:7.749   3rd Qu.:129.7   3rd Qu.:69.61   3rd Qu.:59.35  
# Max.   :1   Max.   :8.683   Max.   :144.2   Max.   :75.93   Max.   :73.30  
#      hp.7            wt.7           sbp.7            dbp.7      
# Min.   :51.62   Min.   :14.88   Min.   : 62.55   Min.   :33.48  
# 1st Qu.:61.59   1st Qu.:22.04   1st Qu.: 93.85   1st Qu.:52.51  
# Median :64.76   Median :25.36   Median : 99.70   Median :57.19  
# Mean   :65.09   Mean   :25.47   Mean   : 99.96   Mean   :57.13  
# 3rd Qu.:68.58   3rd Qu.:28.69   3rd Qu.:106.79   3rd Qu.:62.24  
# Max.   :83.31   Max.   :42.49   Max.   :127.28   Max.   :78.84  
#    pulse.7           BMI.7         age.yrs.11        ht.11      
# Min.   : 47.89   Min.   :10.46   Min.   :11.07   Min.   :128.0  
# 1st Qu.: 75.63   1st Qu.:14.66   1st Qu.:11.63   1st Qu.:145.7  
# Median : 82.12   Median :16.09   Median :11.77   Median :150.8  
# Mean   : 82.68   Mean   :16.07   Mean   :11.77   Mean   :150.6  
# 3rd Qu.: 90.27   3rd Qu.:17.55   3rd Qu.:11.91   3rd Qu.:155.7  
# Max.   :115.25   Max.   :22.63   Max.   :12.47   Max.   :170.9  
#   ht.sit.11         ws.11           hp.11            wt.11      
# Min.   :65.83   Min.   :39.14   Min.   : 57.88   Min.   :15.48  
# 1st Qu.:75.24   1st Qu.:60.80   1st Qu.: 75.34   1st Qu.:36.56  
# Median :77.88   Median :67.36   Median : 80.62   Median :42.66  
# Mean   :77.99   Mean   :67.63   Mean   : 80.97   Mean   :43.09  
# 3rd Qu.:80.80   3rd Qu.:74.38   3rd Qu.: 86.84   3rd Qu.:49.95  
# Max.   :89.86   Max.   :92.21   Max.   :106.25   Max.   :74.01  
#     sbp.11           dbp.11         pulse.11          BMI.11      
# Min.   : 77.41   Min.   :35.92   Min.   : 42.65   Min.   : 8.029  
# 1st Qu.:100.35   1st Qu.:55.10   1st Qu.: 67.19   1st Qu.:16.445  
# Median :105.96   Median :59.30   Median : 74.96   Median :18.925  
# Mean   :106.18   Mean   :59.39   Mean   : 74.81   Mean   :18.830  
# 3rd Qu.:112.93   3rd Qu.:63.97   3rd Qu.: 82.72   3rd Qu.:21.325  
# Max.   :130.91   Max.   :77.36   Max.   :103.35   Max.   :31.142  

summary(subset.female)

#> summary(subset.female)
#      male     age.yrs.7          ht.7          ht.sit.7          ws.7      
# Min.   :0   Min.   :6.313   Min.   :111.6   Min.   :58.55   Min.   :39.31  
# 1st Qu.:0   1st Qu.:7.355   1st Qu.:122.2   1st Qu.:66.02   1st Qu.:53.47  
# Median :0   Median :7.557   Median :126.1   Median :68.23   Median :56.84  
# Mean   :0   Mean   :7.565   Mean   :126.0   Mean   :68.12   Mean   :56.67  
# 3rd Qu.:0   3rd Qu.:7.789   3rd Qu.:130.1   3rd Qu.:70.12   3rd Qu.:60.19  
# Max.   :0   Max.   :8.439   Max.   :141.9   Max.   :76.28   Max.   :72.36  
#      hp.7            wt.7           sbp.7            dbp.7      
# Min.   :51.12   Min.   :13.14   Min.   : 66.38   Min.   :37.35  
# 1st Qu.:61.98   1st Qu.:23.23   1st Qu.: 93.36   1st Qu.:52.58  
# Median :65.67   Median :26.05   Median :100.32   Median :56.94  
# Mean   :65.73   Mean   :26.07   Mean   :100.08   Mean   :57.37  
# 3rd Qu.:69.34   3rd Qu.:29.10   3rd Qu.:106.29   3rd Qu.:62.10  
# Max.   :82.99   Max.   :40.59   Max.   :130.58   Max.   :79.54  
#    pulse.7           BMI.7          age.yrs.11        ht.11      
# Min.   : 48.00   Min.   : 8.804   Min.   :10.92   Min.   :130.3  
# 1st Qu.: 74.35   1st Qu.:15.053   1st Qu.:11.61   1st Qu.:145.8  
# Median : 81.94   Median :16.369   Median :11.78   Median :150.9  
# Mean   : 81.91   Mean   :16.325   Mean   :11.78   Mean   :150.9  
# 3rd Qu.: 89.48   3rd Qu.:17.748   3rd Qu.:11.94   3rd Qu.:155.8  
# Max.   :118.29   Max.   :23.159   Max.   :12.50   Max.   :174.0  
#   ht.sit.11         ws.11           hp.11            wt.11      
# Min.   :66.18   Min.   :37.76   Min.   : 56.78   Min.   :15.19  
# 1st Qu.:75.79   1st Qu.:62.76   1st Qu.: 76.00   1st Qu.:37.58  
# Median :78.34   Median :69.03   Median : 81.13   Median :43.88  
# Mean   :78.26   Mean   :68.75   Mean   : 81.59   Mean   :44.07  
# 3rd Qu.:80.81   3rd Qu.:74.96   3rd Qu.: 87.22   3rd Qu.:50.49  
# Max.   :88.11   Max.   :94.59   Max.   :109.57   Max.   :79.40  
#     sbp.11           dbp.11         pulse.11          BMI.11      
# Min.   : 75.10   Min.   :35.51   Min.   : 41.45   Min.   : 7.649  
# 1st Qu.: 99.05   1st Qu.:54.49   1st Qu.: 68.09   1st Qu.:17.051  
# Median :106.27   Median :59.08   Median : 75.92   Median :19.313  
# Mean   :106.13   Mean   :59.21   Mean   : 75.71   Mean   :19.225  
# 3rd Qu.:113.08   3rd Qu.:64.23   3rd Qu.: 83.34   3rd Qu.:21.563  
# Max.   :140.20   Max.   :80.02   Max.   :103.03   Max.   :30.379  

#box plot

png("boxplot.png")
boxplot(BMI.7~male,data=sim.alspac)
dev.off()

#histograms of BMI age 7 for male and female

png("histogram.png")
hist(subset.female$BMI.7, border="blue")
hist(subset.male$BMI.7, border="green", add=T)
legend("topleft", legend=c("Females", "Males"), lty = 1, col = c("blue", "green"))
dev.off()

#scatter plot

lm1<-lm(subset.male$wt.7~subset.male$ht.7)
plot(subset.male$ht.7, subset.male$wt.7)
abline(lm1)

###########################################################
# regression examples
###########################################################

glm(formula = sim.alspac$BMI.7 ~ sim.alspac$male)
glm(formula = sim.alspac$wt.7 ~ sim.alspac$ht.7 + sim.alspac$male)

DataSHIELD Wiki by DataSHIELD is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. Based on a work at http://www.datashield.ac.uk/wiki