Data preparation

Data are cleaned and saved in RData format for the subsequent analyses.

# Step 1: load the original data
con = dbConnect(SQLite(),
                dbname="VariantSCN5A-third-revision.db")
alltables = dbListTables(con)
my.data <- dbReadTable(con, 'VariantSCN5A')
my.data[my.data=='NA'] <- NA
data<-my.data
dbDisconnect(con)

dim(data) # 2417 by 29
names(data)

# clean `resnum`
table(data$resnum)
sum(is.na(data$resnum)) # 51 * + 1246 missing = 1297 abnormal
sum(data$resnum=="") # 1246 missing

# convert strings to numeric values
data$resnum<-suppressWarnings(as.integer(data$resnum))
sum(is.na(da