Source

leipzig_feedback_2014 / src / plot_evaluations.R

Full commit
#!/usr/bin/env Rscript
# 
# Generate Plots to visualize students' evaluations on the Leipzig workshop

library(ggplot2)
library(reshape)
library(stringr)

feedback <- read.table("evaluations2014.csv", sep='\t', quote='', header=T, na.strings="")
#feedback <- read.delim("evaluations2014.csv", sep='\t', quote='"', header=T, na.strings="")
print(nrow(feedback))
orig.names = names(feedback)

# Define column indexes of groups of questions
LectureRateIndex        = c(12:26)
LectureRelevanceIndex   = c(27:41)
LectureLengthIndex      = c(42:56)
TalksRateIndex          = c(72:75)
TalksRelevanceIndex     = c(76:79)
TalksLengthIndex        = c(80:83)
GenericQuestionIndex       = c(1,2,3,4,6,9, 10, 11)
GenericQuestion2Index      = c(1,84, 85, 86, 87, 88, 89, 90, 91, 93)
#GenericQuestionIndex       = c(1:4, 6,7,9, 74:76)
#GenericQuestion2Index      = c(1, 77:78, 79, 83, 85, 87:90)


names(feedback)[LectureRateIndex]       <- str_match(names(feedback)[LectureRateIndex], "How.*lectures[.]{4,5}(.*)[.]{2,2}$")[,2]       # Rate of lectures
names(feedback)[LectureRelevanceIndex]  <- str_match(names(feedback)[LectureRelevanceIndex], "Do.*work[.]{4,5}(.*)[.]{2,2}$")[,2]            # Relevance of lectures
names(feedback)[LectureLengthIndex]     <- str_match(names(feedback)[LectureLengthIndex], "How.*lectures[.]{4,5}(.*)[.]{2,2}$")[,2]       # Length of lectures
names(feedback)[TalksRateIndex]         <- str_match(names(feedback)[TalksRateIndex], "How.*talks[.]{3,5}(.*)[.]{2,2}$")[,2]             # Rate of talks
names(feedback)[TalksRelevanceIndex]    <- str_match(names(feedback)[TalksRelevanceIndex], "Do.*work[.]{3,5}(.*)[.]{2,2}$?")[,2]              # Relevance of talks
names(feedback)[TalksLengthIndex]       <- str_match(names(feedback)[TalksLengthIndex], "How.*talks[.]{3,5}(.*)[.]{2,2}$")[,2]             # Length of talks


#print (names(feedback))
#print (names(feedback))
pdf("results/plots/feedback_charts_2014.pdf", paper="a4r", width=1280, height=1024, title="Leipzig workshop feedback 2014")


generic_questions_plot = list (theme_bw(),
        geom_bar(aes(x=answer)),
        facet_wrap(~question, scales='free', nrow=3), 
        theme(axis.text.x=element_text(angle=0, hjust=0.5, vjust=1 ))
        )

barchart_plot = list (theme_bw(),
        geom_bar(width=1, colour="#0072B2"),
        theme(legend.position='bottom'),
        facet_wrap(~variable, nrow=3),
        theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1 )),
#        opts(axis.text.x=element_text(angle=90, hjust=0.4 )),
#        scale_fill_brewer(palette="RdPu"),
        scale_fill_brewer(palette="Blues"),
        scale_color_identity("black"),
#        scale_fill_brewer(palette="Greens"),
#        coord_polar(theta='y'),
        scale_x_discrete("answer")
        )
pie_plot = list (theme_bw(),
        geom_bar(width=1),
        theme(legend.position='bottom'),
        facet_wrap(~variable, nrow=3),
        theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1 )),
#        opts(axis.text.x=element_text(angle=90, hjust=0.4 )),
#        coord_polar(theta='y'),
#        scale_fill_brewer(type="seq", palette=2),
        scale_x_discrete("answer")
#        scale_fill_brewer(palette="RdPu")
#        coord_polar() 
#        scale_fill_brewer(palette="PuRd")
        )


violin_plot = list (theme_bw(),
#        geom_violin(aes(x=variable, y=as.numeric(answer), fill=1), fill="#0072B2""#56B4E9"),
        geom_violin(aes(x=variable, y=as.numeric(answer)), fill="#0072B2", colour="black"),
        theme(legend.position='none'),
#        facet_wrap(~variable, nrow=3),
        theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1 )),
        scale_x_discrete("Lecture")
#        scale_y_continuous(breaks=plot_labels)
        )    
student_barchart_plot = list(theme_bw(),
        geom_bar(aes(x=answer, fill=answer), colour="#31A354"),
        facet_wrap(~Timestamp, nrow=3),
        theme(legend.position='none'),
        theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1 )),
#        scale_fill_brewer(palette="RdPu"),
#        scale_fill_brewer(palette="PuRd"),
#        scale_fill_brewer(type="seq", palette=5),
        scale_fill_brewer(palette="Greens"),
        scale_x_discrete("Student (Timestamp of when the feedback form was filled)")
        )

student_violin_plot = list(theme_bw(),
#        geom_violin(aes(x=Timestamp, y=as.numeric(answer), fill='#0072B2'), fill="#56B4E9"),
#        geom_violin(aes(x=Timestamp, y=as.numeric(answer)), fill="#0072B2"),
#        geom_violin(aes(x=Timestamp, y=as.numeric(answer)), fill="#009E73"), # Green009E73
#        geom_violin(aes(x=Timestamp, y=as.numeric(answer)), fill="#018571"),
#        geom_violin(aes(x=Timestamp, y=as.numeric(answer)), fill="#238B45"),
#        geom_violin(aes(x=Timestamp, y=as.numeric(answer)), fill="#74C476"),
        geom_violin(aes(x=Timestamp, y=as.numeric(answer)), fill="#31A354", colour="black"),
        theme(legend.position='none'),
        theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1 )),
        scale_x_discrete("Student (Timestamp of when the feedback form was filled)")
        )


# Generic questions
generic_questions = function() {
    plot_title = "Generic questions"
    print(plot_title)
    #print(summary(feedback))
    #feedback.overall <- subset(feedback, select=c(1, 4:6, 8, 9, 86:90, 94, 98, 102))
    feedback.overall <- subset(feedback, select=c(GenericQuestionIndex))
    feedback.overall.long <- melt.data.frame(feedback.overall, id.vars="Timestamp")
    feedback.overall.long <- feedback.overall.long[complete.cases(feedback.overall.long),]
    feedback.overall.long$question <- factor(feedback.overall.long$variable)
    feedback.overall.long$question <- gsub("(\\w*\\.\\w*\\.\\w*\\.\\w*\\.\\w*\\.\\w*\\.\\w*)\\.", "\\1\n", feedback.overall.long$question, perl=T)
    feedback.overall.long$question <- gsub("\n$", "", feedback.overall.long$question, perl=T)
    feedback.overall.long$question <- gsub("\\.", " ", feedback.overall.long$question, perl=T)
    # FIX: manually fixing some labels
    feedback.overall.long$question <- gsub("What did .*ratio of teaching versus scientific lectures\n", "What did you think\nof the ratio of teaching versus\nscientific lectures", feedback.overall.long$question, perl=T)
    feedback.overall.long$question <- gsub("In general.*\n.*\n.*", "In general what did\nyou think about the length\nof the course", feedback.overall.long$question, perl=T)
    # FIX: merging "Department bullettin", "mailing list" and "Internal Institute mail" answers together
    feedback.overall.long$answer <- factor(feedback.overall.long$value)
    feedback.overall.long$answer <- gsub("^mailing.*", "Mailing list\n(Institute/\nDepartment/\nother)", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^I.*needed", "I needed", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^internal.*", "Mailing list\n(Institute/\nDepartment/\nother)", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^Department.*", "Mailing list\n(Institute/\nDepartment/\nother)", feedback.overall.long$answer, perl=T)
    # FIX: manually fixing some answer labels
    feedback.overall.long$answer <- gsub("^Evoldir.*", "Evoldir\nwebsite", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^More scientific.*", "More scientific\nlectures, please", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^More teaching.*", "More teaching\non programming", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("10 years", "10\nyears", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("([\\w()']*\\s[\\w()']*)\\s", "\\1\n", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("\\.", " ", feedback.overall.long$answer, perl=T)
    ggplot(data=feedback.overall.long) + generic_questions_plot + ggtitle(plot_title)

    feedback.overall <- subset(feedback, select=c(GenericQuestion2Index))
    feedback.overall.long <- melt(feedback.overall, id.vars="Timestamp")
    feedback.overall.long <- feedback.overall.long[complete.cases(feedback.overall.long),]
    feedback.overall.long$question <- factor(feedback.overall.long$variable)
    feedback.overall.long$question <- gsub("(\\w*\\.\\w*\\.\\w*\\.\\w*\\.\\w*\\.\\w*\\.\\w*)\\.", "\\1\n", feedback.overall.long$question, perl=T)
#    feedback.overall.long$question <- gsub("(\\w*\\.\\w*\\.\\w*\\.\\w*\\.\\w*)\\.", "\\1\n", feedback.overall.long$question, perl=T)
    feedback.overall.long$question <- gsub("\n$", "", feedback.overall.long$question, perl=T)
    feedback.overall.long$question <- gsub("\\.", " ", feedback.overall.long$question, perl=T)
    # FIX: manually fixing some labels
    feedback.overall.long$question <- gsub("What did .*ratio of teaching versus scientific lectures\n", "What did you think\nof the ratio of teaching versus\nscientific lectures", feedback.overall.long$question, perl=T)
    feedback.overall.long$question <- gsub("In general.*\n.*\n.*", "In general what did\nyou think about the length\nof the course", feedback.overall.long$question, perl=T)
    # FIX: merging "Department bullettin", "mailing list" and "Internal Institute mail" answers together
    feedback.overall.long$answer <- factor(feedback.overall.long$value)
    feedback.overall.long$answer <- gsub("^mailing.*", "Mailing list\n(Institute/\nDepartment/\nother)", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^I.*needed", "I needed", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^internal.*", "Mailing list\n(Institute/\nDepartment/\nother)", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^Department.*", "Mailing list\n(Institute/\nDepartment/\nother)", feedback.overall.long$answer, perl=T)
    # FIX: manually fixing some answer labels
    feedback.overall.long$answer <- gsub("^Evoldir.*", "Evoldir\nwebsite", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^More scientific.*", "More scientific\nlectures, please", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("^More teaching.*", "More teaching\non programming", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("10 years", "10\nyears", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("([\\w()']*\\s[\\w()']*)\\s", "\\1\n", feedback.overall.long$answer, perl=T)
#    feedback.overall.long$answer <- gsub("([\\w()']*\\s[\\w()']*)\\s", "\\1\n", feedback.overall.long$answer, perl=T)
    feedback.overall.long$answer <- gsub("\\.", " ", feedback.overall.long$answer, perl=T)
    ggplot(data=feedback.overall.long) + generic_questions_plot + ggtitle(plot_title)

}

# Quality of the lecture
#png("feedback_quality_piechart.png")
#dev.new()
qualitylecture <- function() {
    plot_title = "How would you rate the quality of the following lectures?"
    print(plot_title)
    feedback.quality <- subset(feedback, select=c(1, LectureRateIndex, TalksRateIndex))
    feedback.quality.long <- melt(feedback.quality, id.vars="Timestamp")
    print(summary(feedback.quality.long))

#    feedback.quality.long <- feedback.quality.long[complete.cases(feedback.quality.long),]
    plot_labels = rev(c("Very Good", "Good", "Average", "Poor", "Very Poor", "NA"))
    feedback.quality.long$answer <- factor(feedback.quality.long$value, levels=plot_labels)
    p <- qplot(answer, data=feedback.quality.long, fill=answer, main=paste(plot_title, "(Bar charts)")) + barchart_plot
    print(p)
##qplot(data=feedback.quality.long, x=answer, stat="bin", fill=answer, main=paste(plot_title, "(Pie charts)")) + pie_plot + coord_polar(theta="y")
    p <- ggplot(data=feedback.quality.long) + violin_plot + scale_y_continuous("answer", breaks=1:length(plot_labels), labels=plot_labels) + ggtitle(paste(plot_title, "(Violin plots)"))
    print(p)
    p <- ggplot(data=feedback.quality.long) + student_barchart_plot + ggtitle("Distribution of answers by student (Quality of Lectures and Talks)")
    print(p)
#    p <- ggplot(data=feedback.quality.long) + student_violin_plot + scale_y_continuous("answer", breaks=1:length(plot_labels), labels=plot_labels) + ggtitle("Distribution of answers by student (Quality of Lectures and Talks)")
#ggplot(data=feedback.quality.long) + violin_plot
#dev.off()
}

# Relevance of the lecture
#png("feedback_relevance_piechart.png")
#dev.new()
relevance <- function() {
    plot_title = "Do you think the lecture is or will be relevant to your work?"
    print(plot_title)
    feedback.relevance <- subset(feedback, select=c(1, LectureRelevanceIndex, TalksRelevanceIndex))
#    print(summary(feedback.relevance))
    feedback.relevance.long <- melt(feedback.relevance, id.vars="Timestamp")
# FIX: the questions on the talks had Yes/No answer options, while the questions on the lectures had "Very Relevant/Probably/Probably not" options. 
# Solution: merging "Yes" with "Very Relevant" answers, and "No" with "Probably not".
    feedback.relevance.long <- feedback.relevance.long[complete.cases(feedback.relevance.long),]
    feedback.relevance.long$answer <- feedback.relevance.long$value
#feedback.relevance.long$answer <- gsub("^Probably$",        "Probably",          feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub("^Probably not",     "Probably not/No",   feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub("^Not.at.all$",      "Probably not/No",   feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub("^Not at all",       "Probably not/No",   feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub("^Not at all.*",     "Probably not/No",   feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub(".*Not.*",           "Probably not/No",   feedback.relevance.long$answer, perl=T)
    feedback.relevance.long$answer <- gsub("Not.*",             "Probably not/No",   feedback.relevance.long$answer, perl=T)
    feedback.relevance.long$answer <- gsub("^No$",              "Probably not/No",   feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub("^Very relevant \t", "Very relevant/Yes", feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub("^Very relevant",    "Very relevant/Yes", feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub("^Very.*",           "Very relevant/Yes", feedback.relevance.long$answer)
    feedback.relevance.long$answer <- gsub("^Yes$",             "Very relevant/Yes", feedback.relevance.long$answer)
#plot_labels = rev(c("Very relevant/Yes", "Relevant", "Probably", "Probably not/No", "I don't know"))
    plot_labels = rev(c("Very relevant/Yes", "Probably", "Probably not/No", "Not at all", "I don't know"))
    print(levels(factor(feedback.relevance.long$answer)))
    feedback.relevance.long$answer <- factor(feedback.relevance.long$answer, levels=plot_labels)
# FIX: "I don't know" answers should not be included in the violin plots, or at least they should be colored differently. Otherwise, it seems that a "I don't know" answer is better or worst than the other answers. 
#feedback.relevance.long.noIdontknow = subset(feedback.relevance.long, answer != "I don't know")
    feedback.relevance.long.noIdontknow = feedback.relevance.long
    qplot(data=feedback.relevance.long.noIdontknow, x=answer, fill=answer, main=paste(plot_title, "(Bar charts)")) + barchart_plot
#qplot(data=feedback.relevance.long.noIdontknow, stat="bin", x=answer, fill=answer, main=paste(plot_title, "(Pie charts)")) + pie_plot + coord_polar(theta="y")
    print(summary(feedback.relevance.long.noIdontknow))
    ggplot(data=feedback.relevance.long.noIdontknow) + violin_plot + scale_y_continuous("answer", breaks=1:length(plot_labels), labels=plot_labels)+ ggtitle(paste(plot_title, "(Violin plots)"))
#ggplot(data=feedback.relevance.long.noIdontknow) + violin_plot + scale_y_continuous("answer", breaks=1:length(plot_labels))+ ggtitle(paste(plot_title, "(Violin plots)"))
    ggplot(data=feedback.relevance.long.noIdontknow) + student_barchart_plot + ggtitle("Distribution of answers by student (Relevancy of Lectures and Talks)")
    ggplot(data=feedback.relevance.long.noIdontknow) + student_violin_plot + scale_y_continuous("answer", breaks=1:length(plot_labels), labels=plot_labels) + ggtitle("Distribution of answers by student (Relevancy of Lectures and Talks)")
#dev.off()
}

#dev.off()

# Length
#png("feedback_length_piechart.png")
#dev.new()
lecture_length <- function() {
    plot_title = "How would you rate the length of the following lectures?"
    print(plot_title)
    feedback.length <- subset(feedback, select=c(1, LectureLengthIndex, TalksLengthIndex))
    feedback.length.long <- melt(feedback.length, id.vars="Timestamp")
#    print (feedback.length.long)
    feedback.length.long <- feedback.length.long[complete.cases(feedback.length.long),]
    plot_labels = rev(c("Too long", "Just right", "Too short"))
    feedback.length.long$answer <- factor(feedback.length.long$value, levels=plot_labels)
    qplot(data=feedback.length.long, fill=answer, x=answer, main=paste(plot_title, "(Bar charts)")) + barchart_plot
#ggplot(data=feedback.length.long, aes(x=answer, fill=factor(answer)), main=paste(plot_title, "(Pie charts)")) + pie_plot + coord_polar(theta="y")
    ggplot(data=feedback.length.long) + violin_plot + scale_y_continuous("answer", breaks=1:length(plot_labels), labels=plot_labels) + ggtitle(paste(plot_title, "(Violin plots)"))
    ggplot(data=feedback.length.long) + student_barchart_plot + ggtitle("Distribution of answers by student (Length of Lectures and Talks)")
    ggplot(data=feedback.length.long) + student_violin_plot + scale_y_continuous("answer", breaks=1:length(plot_labels), labels=plot_labels) + ggtitle("Distribution of answers by student (Length of Lectures and Talks)")
#dev.off()
}


#generic_questions()
qualitylecture()
#relevance()
#lecture_length()

print("ok")

dev.off()