Skip to contents

# The data input directory on my development machine:
options("avoncap.input" = "~/Data/avoncap/")

Data loading

The raw data is expected to be in the nhs-extract subfolder

rawPn = avoncap::load_data("nhs-extract","pneumococcal")
#> caching item: ~/.cache/avoncap/data-6c0a3f301ee14020e3907a7472c55225-e7f5def26af13823d6fa3194a5398ec4.rda
#> Loaded 3719 rows from 1 files, (3719=3719)

Data normalisation


normPn = rawPn %>% normalise_data()
#> Normalising data using: normalise.nhs_extract.pneumococcal
#> Some columns expected in the mappings were not present: hb
#> caching item: ~/.cache/avoncap/norm-0ef4812a2d1bc751062531c384a2b89e-7034f315d929e4b9b805334ae0e9dcc8.rda
#> mapping .participant_number to admin.record_number
#> mapping .hospital to admin.hospital
#> mapping .nhs_number to admin.patient_identifier
#> mapping .age_at_admission to demog.age
#> mapping .sex to demog.gender
#> mapping .test_date to pneumo.test_date
#> mapping .test to pneumo.test_type
#> mapping .serotype to pneumo.phe_serotype
#> mapping .smoker to demog.smoker
#> mapping .resp_disease___1 to comorbid.no_resp_dx
#> mapping .resp_disease___2 to comorbid.copd
#> mapping .resp_disease___3 to comorbid.asthma
#> mapping .resp_disease___4 to comorbid.bronchiectasis
#> mapping .resp_disease___5 to comorbid.pulmonary_fibrosis
#> mapping .resp_disease___6 to comorbid.resp_other
#> mapping .chd___1 to comorbid.no_heart_dx
#> mapping .chd___2 to comorbid.ccf
#> mapping .chd___3 to comorbid.ihd
#> mapping .chd___4 to comorbid.hypertension
#> mapping .chd___5 to comorbid.af
#> mapping .chd___6 to comorbid.other_heart_dx
#> mapping .mi to comorbid.previous_mi
#> mapping .ckd to comorbid.ckd
#> mapping .liver_disease to comorbid.liver_disease
#> mapping .diabetes to comorbid.diabetes
#> mapping .dm_meds to comorbid.diabetes_medications
#> mapping .dementia___1 to comorbid.no_dementia
#> mapping .dementia___2 to comorbid.dementia
#> mapping .dementia___3 to comorbid.cognitive_impairment
#> mapping .neurological_disease___1 to comorbid.neuro_other
#> mapping .neurological_disease___2 to comorbid.cva
#> mapping .neurological_disease___3 to comorbid.tia
#> mapping .neurological_disease___4 to comorbid.hemiplegia
#> mapping .neurological_disease___5 to comorbid.paraplegia
#> mapping .neurological_disease___6 to comorbid.no_neuro_dx
#> mapping .gastric_ulcers to comorbid.gastric_ulcers
#> mapping .dysphagia to comorbid.dysphagia
#> mapping .pvd to comorbid.periph_vasc_dx
#> mapping .ctd to comorbid.connective_tissue_dx
#> mapping .immunodeficiency to comorbid.immunodeficiency
#> mapping .other_pn_disease to comorbid.other_pneumococcal_risks
#> mapping .hiv___1 to comorbid.no_HIV
#> mapping .hiv___2 to comorbid.HIV
#> mapping .hiv___3 to comorbid.AIDS
#> mapping .cancer to comorbid.solid_cancer
#> mapping .haem_malig___1 to comorbid.no_haemotological_cancer
#> mapping .haem_malig___2 to comorbid.leukaemia
#> mapping .haem_malig___3 to comorbid.lymphoma
#> mapping .recent_chemo to comorbid.recent_chemotherapy
#> mapping .recent_radiotherapy to comorbid.recent_radiotherapy
#> mapping .transplant to comorbid.transplant_recipient
#> mapping .pregnancy to comorbid.pregnancy
#> mapping .drugs___1 to demog.no_drug_abuse
#> mapping .drugs___2 to demog.alcohol_abuse
#> mapping .drugs___3 to demog.ivdu_abuse
#> mapping .drugs___4 to demog.marijuana_abuse
#> mapping .drugs___5 to demog.other_inhaled_drug_abuse
#> mapping .immsup to admission.on_immunosuppression
#> mapping .weight_problem to comorbid.bmi_status
#> mapping .concomittant_flu to comorbid.influenza_infection
#> mapping .hcv to comorbid.hepatitis_c
#> mapping .ppv23 to vaccination.ppv23_vaccination
#> mapping .flu_vaccine to vaccination.flu
#> mapping .cci_total_score to admission.charlson_comorbidity_index
#> mapping .los_days to outcome.length_of_stay
#> mapping .amts to admission.triage_score
#> mapping .resp_rate to admission.respiratory_rate
#> mapping .sats_ra to admission.saturations_on_room_air
#> mapping .systolic_bp to admission.systolic_bp
#> mapping .diastolic_bp to admission.diastolic_bp
#> mapping .crb65_score to admission.crb_65_severity_score
#> mapping .curb65_score to admission.curb_65_severity_score
#> mapping .antibiotic_route to outcome.antibiotic_route
#> mapping .antibiotic_days to outcome.antibiotic_duration
#> mapping .infection_site to admission.infection_site
#> mapping .deranged_lfts to outcome.abnormal_lft
#> mapping .aki to outcome.acute_kidney_injury
#> mapping .pleural_effusion to outcome.pleural_effusion
#> mapping .empyema to outcome.empyema
#> mapping .discharge_destination to outcome.discharge_to
#> mapping .icu to outcome.admitted_icu
#> mapping .niv to outcome.non_invasive_ventilation
#> mapping .intubation to outcome.intubation
#> mapping .recurrent_pneumonia to outcome.recurrent_pneumonia
#> mapping .ecmo to outcome.received_ecmo
#> mapping .inotropes to outcome.received_ionotropes
#> mapping .trachy to outcome.tracheostomy
#> mapping .inpatient_death to outcome.inpatient_death
#> mapping .death_30days to outcome.death_within_30_days
#> mapping .death_1year to outcome.death_within_1_year
#> mapping .survival_days to outcome.survival_duration
#> mapping .albumin to haem.albumin
#> mapping .wcc to haem.white_cell_count
#> the input data set does not have a hb column (or column set)
#> mapping .pmn to haem.neutrophils
#> mapping .lymphocytes to haem.lymphocytes
#> mapping .crp to haem.crp
#> mapping .na_result to haem.sodium
#> mapping .ur_result to haem.urea
#> mapping .egfr to haem.egfr
#> mapping .creatinine to haem.creatinine
#> mapping .cxr_sides to radio.cxr_infection
#> mapping .cxr_lobes to radio.cxr_lobar_changes
#> mapping .death_5year to outcome.death_within_5_years
#> mapping .survival_days_2 to outcome.5_yr_survival_duration
#> mapping .imd_decile to demog.imd_decile
#> Mapped 112 columns
#> Did not map 69 columns

Augment the data

augPn = normPn %>% augment_data()
#> Augmenting data using: augment.nhs_extract.pneumococcal
#> caching item: ~/.cache/avoncap/augment-4c0c0c577381837fb7b8ee36b440288a-68eb508126cbbbc616b66256bddcb267.rda
#> Created comorbid.chronic_pleural_dx, comorbid.interstitial_lung_dx, comorbid.cystic_fibrosis, comorbid.other_chronic_resp_dx, comorbid.pulmonary_hypertension, comorbid.congenital_heart_dx, comorbid.other_arrythmia, comorbid.other_other_heart_dx, demog.care_home_resident using:
#> Created pneumo.vaccine_group, pneumo.serotype_status, pneumo.pcv_group using: pneumo.phe_serotype
#> Created pneumo.pre_covid, pneumo.pandemic_period, pneumo.pcv_vaccine_period using:
#> Created pneumo.invasive_status, pneumo.test_category using: admission.infection_site, pneumo.test_type
#> Created demog.age_category, demog.age_eligible, admission.cci_category, admission.curb_65_category using: demog.age, admission.charlson_comorbidity_index, admission.curb_65_severity_score
#> Created pneumo.clinical_syndrome using: admission.infection_site, outcome.pleural_effusion, outcome.empyema
#> Created survival.length_of_stay, survival.uncensored_time_to_death, survival.last_observed_event using: pneumo.test_date, outcome.length_of_stay, outcome.survival_duration, outcome.5_yr_survival_duration
#> Created survival.length_of_stay_category, survival.30_day_death_time, survival.30_day_death_event, survival.1_yr_death_time, survival.1_yr_death_event, survival.30_day_discharge_time, survival.30_day_discharge_event using:
#> Created survival.length_of_stay_quintile using:
#> Created comorbid.diabetes_type, comorbid.solid_cancer_present, comorbid.haemotological_cancer_present, comorbid.any_cancer_present, comorbid.any_chronic_lung_disease, comorbid.any_chronic_heart_disease, comorbid.cva_or_tia, comorbid.any_immune_compromise using: comorbid.diabetes, comorbid.solid_cancer, comorbid.leukaemia, comorbid.lymphoma, comorbid.no_haemotological_cancer, comorbid.asthma, comorbid.bronchiectasis, comorbid.copd, comorbid.ccf, comorbid.ihd, comorbid.previous_mi, comorbid.hypertension, comorbid.af, comorbid.other_heart_dx, comorbid.cva, comorbid.tia, comorbid.immunodeficiency, admission.on_immunosuppression
#> Created admission.pneumococcal_high_risk using: demog.age, comorbid.other_pneumococcal_risks, comorbid.copd, comorbid.hypertension, comorbid.ccf, comorbid.ihd, comorbid.ckd, comorbid.liver_disease, comorbid.diabetes, comorbid.immunodeficiency, comorbid.asthma, admission.on_immunosuppression
#> Created admission.pneumococcal_risk_classification using: comorbid.cva, comorbid.immunodeficiency, comorbid.leukaemia, comorbid.lymphoma, comorbid.no_haemotological_cancer, admission.on_immunosuppression, comorbid.transplant_recipient, comorbid.ckd, comorbid.HIV, comorbid.solid_cancer, demog.smoker, demog.age, demog.alcohol_abuse, comorbid.diabetes, comorbid.diabetes_medications, comorbid.paraplegia, comorbid.copd, comorbid.asthma, comorbid.liver_disease, comorbid.ccf, comorbid.ihd, comorbid.other_pneumococcal_risks
#> Created  using:

Augmented IPD dataset format:

  • admin.record_number: numeric
  • admin.hospital: factor
  • admin.patient_identifier: character
  • demog.age: numeric
  • demog.gender: factor
  • pneumo.test_date: Date
  • pneumo.test_type: factor
  • pneumo.phe_serotype: factor
  • demog.smoker: factor
  • comorbid.no_resp_dx: factor
  • comorbid.copd: factor
  • comorbid.asthma: factor
  • comorbid.bronchiectasis: factor
  • comorbid.pulmonary_fibrosis: factor
  • comorbid.resp_other: factor
  • comorbid.no_heart_dx: factor
  • comorbid.ccf: factor
  • comorbid.ihd: factor
  • comorbid.hypertension: factor
  • comorbid.af: factor
  • comorbid.other_heart_dx: factor
  • comorbid.previous_mi: factor
  • comorbid.ckd: factor
  • comorbid.liver_disease: factor
  • comorbid.diabetes: factor
  • comorbid.diabetes_medications: factor
  • comorbid.no_dementia: factor
  • comorbid.dementia: factor
  • comorbid.cognitive_impairment: factor
  • comorbid.neuro_other: factor
  • comorbid.cva: factor
  • comorbid.tia: factor
  • comorbid.hemiplegia: factor
  • comorbid.paraplegia: factor
  • comorbid.no_neuro_dx: factor
  • comorbid.gastric_ulcers: factor
  • comorbid.dysphagia: factor
  • comorbid.periph_vasc_dx: factor
  • comorbid.connective_tissue_dx: factor
  • comorbid.immunodeficiency: factor
  • comorbid.other_pneumococcal_risks: factor
  • comorbid.no_HIV: factor
  • comorbid.HIV: factor
  • comorbid.AIDS: factor
  • comorbid.solid_cancer: factor
  • comorbid.no_haemotological_cancer: factor
  • comorbid.leukaemia: factor
  • comorbid.lymphoma: factor
  • comorbid.recent_chemotherapy: factor
  • comorbid.recent_radiotherapy: factor
  • comorbid.transplant_recipient: factor
  • comorbid.pregnancy: factor
  • demog.no_drug_abuse: factor
  • demog.alcohol_abuse: factor
  • demog.ivdu_abuse: factor
  • demog.marijuana_abuse: factor
  • demog.other_inhaled_drug_abuse: factor
  • admission.on_immunosuppression: factor
  • comorbid.bmi_status: factor
  • comorbid.influenza_infection: factor
  • comorbid.hepatitis_c: factor
  • vaccination.ppv23_vaccination: factor
  • vaccination.flu: factor
  • admission.charlson_comorbidity_index: numeric
  • outcome.length_of_stay: numeric
  • admission.triage_score: factor
  • admission.respiratory_rate: numeric
  • admission.saturations_on_room_air: numeric
  • admission.systolic_bp: numeric
  • admission.diastolic_bp: numeric
  • admission.crb_65_severity_score: ordered
  • admission.curb_65_severity_score: ordered
  • outcome.antibiotic_route: factor
  • outcome.antibiotic_duration: numeric
  • admission.infection_site: factor
  • outcome.abnormal_lft: factor
  • outcome.acute_kidney_injury: factor
  • outcome.pleural_effusion: factor
  • outcome.empyema: factor
  • outcome.discharge_to: factor
  • outcome.admitted_icu: factor
  • outcome.non_invasive_ventilation: factor
  • outcome.intubation: factor
  • outcome.recurrent_pneumonia: factor
  • outcome.received_ecmo: factor
  • outcome.received_ionotropes: factor
  • outcome.tracheostomy: factor
  • outcome.inpatient_death: factor
  • outcome.death_within_30_days: factor
  • outcome.death_within_1_year: factor
  • outcome.survival_duration: numeric
  • haem.albumin: numeric
  • haem.white_cell_count: numeric
  • haem.neutrophils: numeric
  • haem.lymphocytes: numeric
  • haem.crp: numeric
  • haem.sodium: numeric
  • haem.urea: numeric
  • haem.egfr: numeric
  • haem.creatinine: numeric
  • radio.cxr_infection: factor
  • radio.cxr_lobar_changes: factor
  • outcome.death_within_5_years: factor
  • outcome.5_yr_survival_duration: numeric
  • demog.imd_decile: character
  • key.admit: character
augPn %>% with(table(admission.infection_site, pneumo.test_type,useNA = "always"))
#>                         pneumo.test_type
#> admission.infection_site Blood culture only Binax only Blood culture and Binax
#>         Lung                           1153       2017                     266
#>         Meningitis                       90         46                      23
#>         Septic arthritis                 27          5                       4
#>         ENT                              14          2                       0
#>         Myositis                          1          0                       0
#>         Otitis externa                    1          0                       0
#>         Abdominal                         4          1                       1
#>         Unclear/unknown                   3          0                       0
#>         Other                            32         13                       1
#>         <NA>                              0          0                       0
#>                         pneumo.test_type
#> admission.infection_site CSF PCR Blood PCR <NA>
#>         Lung                   0         0    0
#>         Meningitis             8         5    0
#>         Septic arthritis       0         0    0
#>         ENT                    0         0    0
#>         Myositis               0         0    0
#>         Otitis externa         0         0    0
#>         Abdominal              0         0    0
#>         Unclear/unknown        0         0    0
#>         Other                  0         2    0
#>         <NA>                   0         0    0