Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_dem_check.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 3.0 KB
Line 
1/* epic_dem_check -- Validate demographics ETL.
2
3Copyright (c) 2014 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7For an overview, see http://informatics.kumc.edu/work/wiki/HeronLoad
8*/
9
10/* Check that we're connected to the identified datamart. */
11select * from NightHeronData.observation_fact where 1 = 0;
12
13
14/* Before we start loading, double-check the results of the view queries. */
15
16/* We should know the age of almost all patients.
17  TODO: find out why we have ~22K patient records with no birthdate.
18*/
19select case when coverage > (95/100) then 1 else 1/0 end test_result from (
20  select (
21     select count(*)
22     from observation_fact_demo nof
23     where nof.concept_cd like 'DEM|AGE:%' ) / (
24     select count(*)
25     from patient_dimension npd) as coverage
26     from dual
27);
28
29select case count(*) when 0 then 1 else 1/0 end as test_result from (
30  -- test for missing start_date
31  select * from observation_fact_demo where start_date is null
32);
33
34
35/* check start_date of death observations #639
36When we used sysdate, there were only as many distinct
37start dates as chunks (i.e. 5). There should be lots of different ones.
38*/
39select case when different_death_dates > 5
40  then 1
41  else 1/0 end  as more_death_dates_than_chunks from (
42select count(distinct f.start_date) different_death_dates
43from observation_fact_vital f
44where concept_cd = 'DEM|VITAL:y'
45and rownum < 10000
46);
47
48/* #637: We were missing race on 100k to 200k patients out of 1.8M */
49select case when other_race / patient_qty > (50000 / 1800000) -- ~3%
50  then 1/0
51  else 1 end as other_race_small_enough from (
52select
53 (select count(*) from patient_dimension where race_cd = 'o') as other_race,
54 (select count(*) from patient_dimension) as patient_qty
55 from dual
56 );
57
58
59/* Check that all patients are accounted for. */
60select case mismatch when 0 then 1 else 1/0 end all_patients_mapped from (
61  select count(*) mismatch from (
62    with epic_maps as (
63      select * from NightHeronData.patient_mapping pm
64      where pm.patient_ide_source in (select source_cd from epic_audit_info)
65      )
66    select * from clarity.patient p
67    left join epic_maps em on p.pat_id = em.patient_ide
68    left join NightHeronData.patient_dimension pd
69           on em.patient_num = pd.patient_num
70    where pd.patient_num is null
71    and em.patient_ide not in (select pat_id from epic_test_patients)
72   )
73  );
74
75/* All dead people should have a death date. */
76select case mismatches when 0 then 1 else 1/0 end test_result from (
77select count(*) mismatches from (
78select *
79from NightHerondata.patient_dimension
80where VITAL_STATUS_CD = 'Y'
81  and death_date is null
82));
83
84/* No null language codes . see #180, #476 */
85select case bad_records when 0 then 1 else 1/0 end test_result from (
86select count(*) bad_records from (
87select *
88from NightHerondata.patient_dimension
89where ((LANGUAGE_CD is null) or (sex_cd is null) or (race_cd is null) or (marital_status_cd is null) or (religion_cd is null))
90));
Note: See TracBrowser for help on using the repository browser.