Snippets

Denis Drescher Outreach Survey Analyses

Created by Denis Drescher last modified
library(data.table)
library(likert)
library(car)
library(reshape)  # https://github.com/jbryer/likert/issues/26

# http://stackoverflow.com/questions/26561518
`%or%` = function(a, b) {
  cmp = function(a,b) if (identical(a, FALSE) ||
                            is.null(a) ||
                            is.na(a) ||
                            is.nan(a) ||
                            length(a) == 0) b else a
  
  if (length(a) > 1)
    mapply(cmp, a, b)
  else
    cmp(a, b)
}

resultses <- c()
resultses[[1]] <- read.csv('~/documents/university/empiricism/data/Ord and Singer Survey (Antworten) - Formularantworten 1 - 2015-06-20.tsv',
                           sep='\t', na.strings=c(''), as.is=TRUE, strip.white=TRUE)
resultses[[2]] <- read.csv('~/documents/university/empiricism/data/Ord and Singer Survey (Antworten) - Formularantworten 2 - 2015-07-10.tsv',
                           sep='\t', na.strings=c(''), as.is=TRUE, strip.white=TRUE)
results <- data.frame(rbindlist(resultses))

names(results) <- c('timestamp',
                    'rationality.01.rational.positive',
                    'rationality.02.rational.positive',
                    'rationality.03.experiential.positive',
                    'rationality.04.rational.negative',
                    'rationality.05.experiential.positive',
                    'rationality.06.rational.negative',
                    'rationality.07.experiential.positive',
                    'rationality.08.rational.negative',
                    'rationality.09.experiential.negative',
                    'rationality.10.rational.negative',
                    'altruism.01',
                    'altruism.02',
                    'altruism.03',
                    'altruism.04',
                    'altruism.05',
                    'altruism.06',
                    'altruism.07',
                    'altruism.08',
                    'altruism.09',
                    'altruism.10',
                    'career.opportunity.teaser.motivation',
                    'career.opportunity.teaser.curiosity',
                    '1000.opportunity.info.motivation',
                    '1000.opportunity.info.curiosity',
                    '1percent.opportunity.teaser.motivation',
                    '1percent.opportunity.teaser.curiosity',
                    'deathbed.opportunity.teaser.motivation',
                    'deathbed.opportunity.teaser.curiosity',
                    'ironman.opportunity.teaser.motivation',
                    'ironman.opportunity.teaser.curiosity',
                    'pond.obligation.info.motivation',
                    'pond.obligation.info.curiosity',
                    'deathbed.obligation.teaser.motivation',
                    'deathbed.obligation.teaser.curiosity',
                    'blindness.obligation.info.motivation',
                    'blindness.obligation.info.curiosity',
                    'art.obligation.info.motivation',
                    'art.obligation.info.curiosity',
                    'stockmarket.other.info.motivation',
                    'stockmarket.other.info.curiosity',
                    'age',
                    'country',
                    'ea',
                    'religiosity',
                    'politics',
                    'education',
                    'degree',
                    'referrer',
                    'comment')
row.names(results) <- results$timestamp
for (i in 1:10) {
  results[, i+1] <- unname(c('Strongly disagree'=-2, 'Disagree'=-1, 'Neutral'=0, 'Agree'=1, 'Strongly agree'=2)[results[, i+1]])
  results[, i+11] <- unname(c('Never'=-2, 'Once'=-1, 'More than once'=0, 'Often'=1, 'Very often'=2)[results[, i+11]])
}
results$rationality.04.rational.negative <- -1 * results$rationality.04.rational.negative
results$rationality.06.rational.negative <- -1 * results$rationality.06.rational.negative
results$rationality.08.rational.negative <- -1 * results$rationality.08.rational.negative
results$rationality.09.experiential.negative <- -1 * results$rationality.09.experiential.negative
results$rationality.10.rational.negative <- -1 * results$rationality.10.rational.negative
for (i in 1:10) {
  results[, i+1] <- factor(results[, i+1], levels=-2:2, ordered=TRUE)
  results[, i+11] <- factor(results[, i+1], levels=-2:2, ordered=TRUE)
}
for (i in 22:41) {
  results[, i] <- factor(unname(c('Strongly disagree'=-2, 'Disagree'=-1, 'Neutral'=0, 'Agree'=1, 'Strongly agree'=2)[results[, i]]),
                         levels=-2:2, ordered=TRUE)
}

# Evil stuff
rei.scores <- matrix(NA, nrow=length(results[, 1]), ncol=10)
altruism.scores <- matrix(NA, nrow=length(results[, 1]), ncol=10)
for (i in 1:10) {
  rei.scores[, i] <- unname(c('Strongly disagree'=-2, 'Disagree'=-1, 'Neutral'=0, 'Agree'=1, 'Strongly agree'=2)[results[, i+1]])
  altruism.scores[, i] <- unname(c('Never'=-2, 'Once'=-1, 'More than once'=0, 'Often'=1, 'Very often'=2)[results[, i+11]])
}
rationality.scores <- subset(rei.scores, select=c(1, 2, 4, 6, 8, 10))
experience.scores <- subset(rei.scores, select=c(3, 5, 7, 9))
results$rationality.mean <- rowMeans(rationality.scores)
results$experience.mean <- rowMeans(experience.scores)
results$altruism.mean <- rowMeans(altruism.scores)
rationality.tertiles <- quantile(results$rationality.mean, c(1/3, 2/3), na.rm=TRUE)
experience.tertiles <- quantile(results$experience.mean, c(1/3, 2/3), na.rm=TRUE)
altruism.tertiles <- quantile(results$altruism.mean, c(1/3, 2/3), na.rm=TRUE)
# The center partition may end up smaller but this way the partitions stay symmetrical, which is more intuitive
results$rationality.simple[results$rationality.mean < rationality.tertiles[2]] <- 'Medium rational'
results$rationality.simple[results$rationality.mean <= rationality.tertiles[1]] <- 'Little rational'
results$rationality.simple[results$rationality.mean >= rationality.tertiles[2]] <- 'Highly rational'
results$experience.simple[results$experience.mean < experience.tertiles[2]] <- 'Medium experiential'
results$experience.simple[results$experience.mean <= experience.tertiles[1]] <- 'Little experiential'
results$experience.simple[results$experience.mean >= experience.tertiles[2]] <- 'Highly experiential'
results$altruism.simple[results$altruism.mean < altruism.tertiles[2]] <- 'Medium altruistic'
results$altruism.simple[results$altruism.mean <= altruism.tertiles[1]] <- 'Little altruistic'
results$altruism.simple[results$altruism.mean >= altruism.tertiles[2]] <- 'Highly altruistic'
results$rationality.simple <- factor(results$rationality.simple,
                                     levels=c('Little rational', 'Medium rational', 'Highly rational'))
results$experience.simple <- factor(results$experience.simple,
                                    levels=c('Little experiential', 'Medium experiential', 'Highly experiential'))
results$altruism.simple <- factor(results$altruism.simple,
                                  levels=c('Little altruistic', 'Medium altruistic', 'Highly altruistic'))
# End of evil stuff

# Cleaning and simplification
results$ea <- list('I have heard of effective altruism before  this survey.'='Knows of EA',
                   'I have *not* heard of effective altruism before  this survey.'='No EA',
                   'I identify as effective altruist.'='EA')[results$ea]
results$degree <- list('Information technology'='Sciences (including math)',
                       'Classics but in spare time philosophy'='Philosophy',
                       'sports'='Sciences (including math)',
                       'Law'='Social sciences',
                       'sciences / engineering'='Sciences (including math)',
                       'Engineering'='Sciences (including math)',
                       'education'='Social sciences',
                       'Economics'='Social sciences',
                       'One degree in hard science, one in a social science'='Sciences (including math)',
                       'Theatre'='Other humanities',
                       'theology'='Other humanities',
                       'medicine'='Sciences (including math)',
                       'Maths and Philosophy'='Sciences (including math)',
                       'anthropology'='Sciences (including math)',
                       'Birth and early childhood development'='Social sciences',
                       'economics'='Social sciences',
                       'Graphic communications'='Other humanities',
                       'Environmental mgmt'='Other humanities',
                       'Finance'='Social sciences',
                       'horticulture'='Sciences (including math)',
                       'Computer Science'='Sciences (including math)',
                       'Art'='Other humanities')[results$degree] %or% results$degree
results$degree[results$degree == 'shop'] <- NA
results$degree[results$degree == 'none'] <- NA
results$degree[results$degree == 'Electronic Music'] <- NA
results$referrer <- list('channels of Bronies for Good'='Bronies for Good')[results$referrer] %or% results$referrer
results$politics.simple <- list('Communist'='Left', 'Conservative'='Right', 'Green'='Left',
                                'Left authoritarian'='Left', 'Left libertarian'='Left', 'left green'='Left',
                                'Progressive'='Left', 'Right libertarian'='Right', 'Social democrat'='Left',
                                'Socialist'='Left')[results$politics] %or% 'Other'
results$education.simple <- list('I’ve graduated high school'='School',
                                 'I haven’t graduated school'='School')[results$education] %or% 'Higher'
results$age <- as.numeric(results$age)
results$age[results$age > 120] <- NA
results$age.simple[results$age < 15] <- '0–14'
results$age.simple[results$age >= 15 & results$age < 25] <- '15–24'
results$age.simple[results$age >= 25 & results$age < 45] <- '25–44'
results$age.simple[results$age >= 45 & results$age < 65] <- '45–64'
results$age.simple[results$age >= 65] <- '65+'

# Casts
results$country <- as.factor(results$country)
results$politics <- factor(results$politics)
results$degree <- factor(results$degree)
results$referrer <- factor(results$referrer)
results$ea <- factor(results$ea, levels=c('No EA', 'Knows of EA', 'EA'), ordered=TRUE)
results$religiosity <- factor(results$religiosity,
                              levels=c('Agnostic or atheist', 'Haven’t thought about it',
                                       'Sort of religious or spiritual', 'Devoutly religious or spiritual'),
                              ordered=TRUE)
results$religiosity.simple <- factor(list('Agnostic or atheist'='Nonreligious',
                                          'Haven’t thought about it'='Nonreligious',
                                          'Sort of religious or spiritual'='Religious',
                                          'Devoutly religious or spiritual'='Religious'
                                          )[results$religiosity],
                                      levels=c('Nonreligious', 'Religious'),
                                      ordered=TRUE)
results$education <- factor(results$education,
                            levels=c('I haven’t graduated school',
                                     'I’ve graduated high school',
                                     'I’ve been to a university or college',
                                     'I have a university or college degree',
                                     'I have a PhD'),
                            ordered=TRUE)
results$age.simple <- factor(results$age.simple, levels=c('0-14', '15–24', '25–44', '45–64', '65+'), ordered=TRUE)

features <- list(
  'rationality'=subset(results, select=c(2, 3, 5, 7, 9, 11)),
  'experience'=subset(results, select=c(4, 6, 8, 10)),
  'altruism'=subset(results, select=12:21),
  'all'=subset(results, select=22:41),
  'opportunity'=subset(results, select=22:31),
  'obligation'=subset(results, select=32:41),
  'teaser'=subset(results, select=c(22, 23, 26, 27, 28, 29, 30, 31, 34, 35)),
  'info'=subset(results, select=c(24, 25, 32, 33, 36, 37, 38, 39, 40, 41)),
  'motivation'=subset(results, select=seq(22, 40, 2)),
  'curiosity'=subset(results, select=seq(23, 41, 2)))
cohorts <- results[
  c('ea', 'religiosity', 'religiosity.simple',
    'education', 'degree', 'education.simple')]
# Other: c('country', 'politics', 'politics.simple', 'referrer', 'age.simple')

# More evil stuff
for (feature in names(features)) {
  num.matrix <- sapply(features[[feature]], function (row) as.numeric(row) - 3)
  results[[paste(feature, 'mean', sep='.')]] <- rowMeans(num.matrix)
}
means = names(results)[grepl('.mean$', names(results))]

dummy <- likert(features$all)
licorice <- array(data=dummy, dim=c(length(cohorts), length(features)),
                  dimnames=list(names(cohorts), names(features)))
for (feature in names(features)) {
  for (cohort in names(cohorts)) {
    print(sprintf('%s, %s', feature, cohort))
    licorice[[cohort, feature]] <- likert(features[[feature]][which(!is.na(cohorts[[cohort]])),],
                                          grouping=cohorts[[cohort]][!is.na(cohorts[[cohort]])])
  }
}
for (feature in names(features)) {
  print(feature)
  png(sprintf('%s.png', feature), height=90+20*length(features[[feature]]))
  output <- plot(likert(features[[feature]]))
  print(output)
  dev.off()
  for (cohort in names(cohorts)) {
    print(sprintf('    %s', cohort))
    png(sprintf('%s-%s.png', feature, cohort), height=1200)
    output <- plot(licorice[[cohort, feature]], include.histogram=TRUE,
                   group.order=levels(cohorts[[cohort]]))
    print(output)
    dev.off()
  }
}

for (cohort in c('ea', 'education.simple', 'religiosity.simple', 'degree')) {
  molten <- melt(results, cohort, means, rm.na=TRUE)
  png(sprintf('%s.boxplots.png', cohort), width=1200)
  output <- ggplot(data=molten, aes(x=variable, y=value)) + geom_boxplot(aes(fill=molten[[cohort]]))
  output + facet_wrap( ~ variable, scales='free')
  print(output)
  dev.off()
}

for (feature in means[4:10]) {
  png(sprintf('%s.matrix.png', feature), width=800, height=800)
  scatterplotMatrix(~ rationality.mean+experience.mean+altruism.mean+results[[feature]],
                    data=results,
                    var.labels=c('rational', 'experiential', 'altruistic', strsplit(feature, '\\.')[[1]][[1]]))
  dev.off()
}

# Test for first and second hypothesis. Only noise.
molten <- melt(data=results, c(), c('obligation.mean', 'opportunity.mean'), na.rm=TRUE)
print(ttestBF(formula=value ~ variable, data=molten, var.equal=TRUE))

# Test for the seeming education corellation. Only noise.
molten <- melt(data=results, c('education.simple'), c('all.mean'), na.rm=TRUE)
print(ttestBF(formula=value ~ education.simple, data=molten, var.equal=TRUE))

# Test for experiential inclination vs. religiosity. Kind of indicative.
molten <- melt(data=results[!is.na(results$religiosity.simple), ], c('religiosity.simple'), c('experience.mean'), na.rm=TRUE)
print(ttestBF(formula=value ~ religiosity.simple, data=molten, var.equal=TRUE))
png('religiosity.simple.boxplots.png', width=1200)
output <- ggplot(data=molten, aes(x=variable, y=value)) + geom_boxplot(aes(fill=molten$religiosity.simple))
output + facet_wrap( ~ variable, scales='free')
print(output)
dev.off()

Comments (0)