source: heron_analysis/death.Rmd

heron-michigan
Last change on this file was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 2 years ago

Merge with demo_concepts_3800

  • Property exe set to *
File size: 3.5 KB
Line 
1Death Signals
2=============
3In each plot below, the x-axis represents the number of days difference between the two sources.
4
5```{r echo=FALSE}
6# Read in data
7q <- readRDS('<full path to .Rda file>')
8
9# Eyeball
10#names(q)
11#q$label
12q$concept$name
13
14
15# Determine the "when" in days by name (source string - q$concept$name)
16who.when <- function(q, ix, start) {
17  d <- q$fact.frame[[ix]][,c('patient.num', 'start.date', 'end.date', 'code')]
18  day <- 24 * 60 * 60
19  if(start==TRUE){fact.date=d$start.date}else{fact.date=d$end.date}
20  d$when <- as.numeric(fact.date) / day
21  d
22}
23
24
25# Extract each source
26naaccr <- who.when(q, 1, start=TRUE) # TODO: use names (like 'Hospital') rather than magic numbers
27hospital <- who.when(q, 2, start=TRUE)
28ssa <- who.when(q, 3, start=TRUE)
29uhc.medicare_champus <- who.when(q, 4, start=TRUE)
30uhc.not_medicare_champus <- who.when(q, 5, start=TRUE)
31
32
33# Make my own histogram plot with log scale
34
35# Helper function - log of counts avoiding negative infinity
36log.zero<-function(v){
37  ret = if(v==0){0}else{log10(v)}
38}
39
40
41# Function to plot a histogram of time deltas between two death sources
42plot.loghist <- function (death.data1, data1.title, death.data2, data2.title, min.delta.days.plot, min.delta.days.txt) {
43  case <- merge(death.data1, death.data2, by=c('patient.num'), suffixes = c(".data1",".data2"))
44  deltas = with(case, when.data1-when.data2)
45  if(min.delta.days.plot > 0){deltas <- subset(deltas, abs(deltas) > min.delta.days.plot)}
46  h = hist(deltas, breaks=250, plot=FALSE)
47  title = paste(data1.title, "MINUS", data2.title)
48  if(min.delta.days.plot>0){title = paste(title, "Ignoring Deltas <", min.delta.days.plot, "Days")}
49  s1 = paste(data1.title, "Facts:", length(death.data1$patient.num))
50  s2 = paste(data2.title, "Facts:", length(death.data2$patient.num))
51  s3 = paste("Patients in both data sets (number included in histogram):", length(case$patient.num))
52  subtitle = paste(s1, s2, s3, sep="\n")
53  plot(h$mids, sapply(h$counts, log.zero), type = "h", main=title, xlab="", ylab="log10(counts)", sub=subtitle)
54 
55  deltas <- subset(deltas, abs(deltas) > min.delta.days.txt)
56  paste('In the above,', length(deltas), 'death dates differ by more than', min.delta.days.txt, 'days.' )
57}
58
59# Test code for looking at the biggest deltas
60# Marge the two on patient number
61# case <- merge(hospital, naaccr, by=c('patient.num'), suffixes = c(".hosp",".naaccr"))
62# create a column of diffs
63# case$diff <- abs(case$when.hosp - case$when.naaccr)
64# sort by diff
65# sorted_case <- case[order(case$diff, decreasing=TRUE),]
66# sorted <- sorted_case[1:10,] # look at first 10 again
67
68
69# Compare the various sources
70ignore.delta.lt.plot <- 0 # ingore deltas less than this (in days) for the plot
71ignore.delta.lt.txt <- 30 # display the total death record count that differs more than this (in text output of the report).
72
73plot.loghist(hospital, "Hospital Death Date", ssa, "SSA Death Date", ignore.delta.lt.plot, ignore.delta.lt.txt)
74plot.loghist(hospital, "Hospital Death Date", naaccr, "NAACCR Death Date", ignore.delta.lt.plot, ignore.delta.lt.txt)
75plot.loghist(hospital, "Hospital Death Date", uhc.not_medicare_champus, "UHC Death Date", ignore.delta.lt.plot, ignore.delta.lt.txt)
76plot.loghist(ssa, "SSA Death Date", naaccr, "NAACCR Death Date", ignore.delta.lt.plot, ignore.delta.lt.txt)
77plot.loghist(ssa, "SSA Death Date", uhc.not_medicare_champus, "UHC Death Date", ignore.delta.lt.plot, ignore.delta.lt.txt)
78plot.loghist(naaccr, "NAACCR Death Date", uhc.not_medicare_champus, "UHC Death Date", ignore.delta.lt.plot, ignore.delta.lt.txt)
Note: See TracBrowser for help on using the repository browser.