Extended R Practical: Answers
########################################### # Rebecca Wilson Newcastle University # Intro to R course, extended practical # data: sim alspac ########################################### #set the working directory setwd("XXXXXXXXXXXXXXXX/XXXXXXXXXXX") #read in the alspac simulated data sim.alspac<-read.csv("alspac.csv") #what variables are present? colnames(sim.alspac) # [1] "male" "age.yrs.7" "ht.7" "ht.sit.7" "ws.7" # [6] "hp.7" "wt.7" "sbp.7" "dbp.7" "pulse.7" #[11] "BMI.7" "age.yrs.11" "ht.11" "ht.sit.11" "ws.11" #[16] "hp.11" "wt.11" "sbp.11" "dbp.11" "pulse.11" #[21] "BMI.11" #number of participants dim(sim.alspac) #[1] 1000 21 #1000 participants #21 variables #selecting and subsetting data #split by gender subset.male<-subset(sim.alspac, male == 1) subset.female<-subset(sim.alspac, male == 0) #find out the number of male and female participants dim(subset.male) #[1] 478 21 Â dim(subset.female) #[1] 522 21 ####################################################### # Exploring the data ####################################################### #summaries by gender summary(subset.male) #> summary(subset.male) # male age.yrs.7 ht.7 ht.sit.7 ws.7 # Min. :1 Min. :6.617 Min. :110.7 Min. :59.65 Min. :43.99 # 1st Qu.:1 1st Qu.:7.352 1st Qu.:121.7 1st Qu.:65.90 1st Qu.:52.15 # Median :1 Median :7.556 Median :125.3 Median :67.79 Median :55.90 # Mean :1 Mean :7.550 Mean :125.5 Mean :67.78 Mean :55.99 # 3rd Qu.:1 3rd Qu.:7.749 3rd Qu.:129.7 3rd Qu.:69.61 3rd Qu.:59.35 # Max. :1 Max. :8.683 Max. :144.2 Max. :75.93 Max. :73.30 # hp.7 wt.7 sbp.7 dbp.7 # Min. :51.62 Min. :14.88 Min. : 62.55 Min. :33.48 # 1st Qu.:61.59 1st Qu.:22.04 1st Qu.: 93.85 1st Qu.:52.51 # Median :64.76 Median :25.36 Median : 99.70 Median :57.19 # Mean :65.09 Mean :25.47 Mean : 99.96 Mean :57.13 # 3rd Qu.:68.58 3rd Qu.:28.69 3rd Qu.:106.79 3rd Qu.:62.24 # Max. :83.31 Max. :42.49 Max. :127.28 Max. :78.84 # pulse.7 BMI.7 age.yrs.11 ht.11 # Min. : 47.89 Min. :10.46 Min. :11.07 Min. :128.0 # 1st Qu.: 75.63 1st Qu.:14.66 1st Qu.:11.63 1st Qu.:145.7 # Median : 82.12 Median :16.09 Median :11.77 Median :150.8 # Mean : 82.68 Mean :16.07 Mean :11.77 Mean :150.6 # 3rd Qu.: 90.27 3rd Qu.:17.55 3rd Qu.:11.91 3rd Qu.:155.7 # Max. :115.25 Max. :22.63 Max. :12.47 Max. :170.9 # ht.sit.11 ws.11 hp.11 wt.11 # Min. :65.83 Min. :39.14 Min. : 57.88 Min. :15.48 # 1st Qu.:75.24 1st Qu.:60.80 1st Qu.: 75.34 1st Qu.:36.56 # Median :77.88 Median :67.36 Median : 80.62 Median :42.66 # Mean :77.99 Mean :67.63 Mean : 80.97 Mean :43.09 # 3rd Qu.:80.80 3rd Qu.:74.38 3rd Qu.: 86.84 3rd Qu.:49.95 # Max. :89.86 Max. :92.21 Max. :106.25 Max. :74.01 # sbp.11 dbp.11 pulse.11 BMI.11 # Min. : 77.41 Min. :35.92 Min. : 42.65 Min. : 8.029 # 1st Qu.:100.35 1st Qu.:55.10 1st Qu.: 67.19 1st Qu.:16.445 # Median :105.96 Median :59.30 Median : 74.96 Median :18.925 # Mean :106.18 Mean :59.39 Mean : 74.81 Mean :18.830 # 3rd Qu.:112.93 3rd Qu.:63.97 3rd Qu.: 82.72 3rd Qu.:21.325 # Max. :130.91 Max. :77.36 Max. :103.35 Max. :31.142 summary(subset.female) #> summary(subset.female) # male age.yrs.7 ht.7 ht.sit.7 ws.7 # Min. :0 Min. :6.313 Min. :111.6 Min. :58.55 Min. :39.31 # 1st Qu.:0 1st Qu.:7.355 1st Qu.:122.2 1st Qu.:66.02 1st Qu.:53.47 # Median :0 Median :7.557 Median :126.1 Median :68.23 Median :56.84 # Mean :0 Mean :7.565 Mean :126.0 Mean :68.12 Mean :56.67 # 3rd Qu.:0 3rd Qu.:7.789 3rd Qu.:130.1 3rd Qu.:70.12 3rd Qu.:60.19 # Max. :0 Max. :8.439 Max. :141.9 Max. :76.28 Max. :72.36 # hp.7 wt.7 sbp.7 dbp.7 # Min. :51.12 Min. :13.14 Min. : 66.38 Min. :37.35 # 1st Qu.:61.98 1st Qu.:23.23 1st Qu.: 93.36 1st Qu.:52.58 # Median :65.67 Median :26.05 Median :100.32 Median :56.94 # Mean :65.73 Mean :26.07 Mean :100.08 Mean :57.37 # 3rd Qu.:69.34 3rd Qu.:29.10 3rd Qu.:106.29 3rd Qu.:62.10 # Max. :82.99 Max. :40.59 Max. :130.58 Max. :79.54 # pulse.7 BMI.7 age.yrs.11 ht.11 # Min. : 48.00 Min. : 8.804 Min. :10.92 Min. :130.3 # 1st Qu.: 74.35 1st Qu.:15.053 1st Qu.:11.61 1st Qu.:145.8 # Median : 81.94 Median :16.369 Median :11.78 Median :150.9 # Mean : 81.91 Mean :16.325 Mean :11.78 Mean :150.9 # 3rd Qu.: 89.48 3rd Qu.:17.748 3rd Qu.:11.94 3rd Qu.:155.8 # Max. :118.29 Max. :23.159 Max. :12.50 Max. :174.0 # ht.sit.11 ws.11 hp.11 wt.11 # Min. :66.18 Min. :37.76 Min. : 56.78 Min. :15.19 # 1st Qu.:75.79 1st Qu.:62.76 1st Qu.: 76.00 1st Qu.:37.58 # Median :78.34 Median :69.03 Median : 81.13 Median :43.88 # Mean :78.26 Mean :68.75 Mean : 81.59 Mean :44.07 # 3rd Qu.:80.81 3rd Qu.:74.96 3rd Qu.: 87.22 3rd Qu.:50.49 # Max. :88.11 Max. :94.59 Max. :109.57 Max. :79.40 # sbp.11 dbp.11 pulse.11 BMI.11 # Min. : 75.10 Min. :35.51 Min. : 41.45 Min. : 7.649 # 1st Qu.: 99.05 1st Qu.:54.49 1st Qu.: 68.09 1st Qu.:17.051 # Median :106.27 Median :59.08 Median : 75.92 Median :19.313 # Mean :106.13 Mean :59.21 Mean : 75.71 Mean :19.225 # 3rd Qu.:113.08 3rd Qu.:64.23 3rd Qu.: 83.34 3rd Qu.:21.563 # Max. :140.20 Max. :80.02 Max. :103.03 Max. :30.379 #box plot png("boxplot.png") boxplot(BMI.7~male,data=sim.alspac) dev.off() #histograms of BMI age 7 for male and female png("histogram.png") hist(subset.female$BMI.7, border="blue") hist(subset.male$BMI.7, border="green", add=T) legend("topleft", legend=c("Females", "Males"), lty = 1, col = c("blue", "green")) dev.off() #scatter plot lm1<-lm(subset.male$wt.7~subset.male$ht.7) plot(subset.male$ht.7, subset.male$wt.7) abline(lm1) ########################################################### # regression examples ########################################################### glm(formula = sim.alspac$BMI.7 ~ sim.alspac$male) glm(formula = sim.alspac$wt.7 ~ sim.alspac$ht.7 + sim.alspac$male)
DataSHIELD Wiki by DataSHIELD is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. Based on a work at http://www.datashield.ac.uk/wiki