Commits

Chris Mungall committed 0f0867a Merge

merged in latest galaxy-dist

Comments (0)

Files changed (1393)

 
 # Database stuff
 database/beaker_sessions
+database/community_files
 database/compiled_templates
 database/files
 database/pbs
 
 HOW TO START
 ============
-Galaxy requires Python 2.4, 2.5 or 2.6. To check your python version, run:
+Galaxy requires Python 2.5, 2.6 or 2.7. To check your python version, run:
 
 % python -V
 Python 2.4.4
 
-Before starting Galaxy for the first time, please run the setup script:
-
-% sh setup.sh
-
-If setup.sh finishes successfully, you can then proceed to starting Galaxy:
+Start Galaxy:
 
 % sh run.sh
 
 Not all dependencies are included for the tools provided in the sample
 tool_conf.xml.  A full list of external dependencies is available at:
 
-http://bitbucket.org/galaxy/galaxy-central/wiki/ToolDependencies
+http://wiki.g2.bx.psu.edu/Admin/Tools/Tool%20Dependencies

buildbot_setup.sh

 
 LINKS="
 /galaxy/data/location/add_scores.loc
+/galaxy/data/location/all_fasta.loc
 /galaxy/data/location/alignseq.loc
 /galaxy/data/annotation_profiler
 /galaxy/data/annotation_profiler/annotation_profiler.loc
 /galaxy/data/location/bowtie_indices_color.loc
 /galaxy/data/location/bwa_index.loc
 /galaxy/data/location/bwa_index_color.loc
+/galaxy/data/location/ccat_configurations.loc
 /galaxy/data/location/codingSnps.loc
 /galaxy/data/location/encode_datasets.loc
 /galaxy/home/universe/encode_feature_partitions
 /galaxy/data/location/liftOver.loc
 /galaxy/data/location/maf_index.loc
 /galaxy/data/location/maf_pairwise.loc
-/galaxy/data/location/microbes/microbial_data.loc
+/galaxy/data/location/microbial_data.loc
+/galaxy/data/location/mosaik_index.loc
 /galaxy/data/location/perm_base_index.loc
 /galaxy/data/location/perm_color_index.loc
 /galaxy/data/location/phastOdds.loc
+/galaxy/data/location/picard_index.loc
 /galaxy/data/location/quality_scores.loc
 /galaxy/data/location/regions.loc
 /galaxy/data/location/sam_fa_indices.loc
 /galaxy/data/location/srma_index.loc
 /galaxy/data/taxonomy
 /galaxy/data/location/twobit.loc
+/galaxy/software/tool-data/gatk
 "
 
 SAMPLES="
 universe_wsgi.ini.sample
 tool_data_table_conf.xml.sample
 tool-data/shared/ucsc/builds.txt.sample
+migrated_tools_conf.xml.sample
 "
 
 DIRS="
     fi
 done
 
-# for wig_to_bigWig
-if [ ! -f "tool-data/shared/ucsc/chrom/hg17.len" -a -f "test-data/wig_to_bigwig_hg17.len" ]; then
-    mkdir -p tool-data/shared/ucsc/chrom
-    cp test-data/wig_to_bigwig_hg17.len tool-data/shared/ucsc/chrom/hg17.len
+# for wig_to_bigWig and bed_to_bigBed
+for build in hg17 hg18; do
+    if [ -f "test-data/chrom/$build.len" ]; then
+        echo "Copying test-data/chrom/$build.len to tool-data/shared/ucsc/chrom/"
+        mkdir -p tool-data/shared/ucsc/chrom
+        cp test-data/chrom/$build.len tool-data/shared/ucsc/chrom/$build.len
+    fi
+done
+
+if [ -d "test-data-repo" ]; then
+    echo "Updating test data repository"
+    cd test-data-repo
+    hg pull
+    hg update
+    cd ..
+else
+    echo "Cloning test data repository"
+    hg clone http://bitbucket.org/natefoo/galaxy-test-data/ test-data-repo
 fi
+echo "Setting up test data location files"
+python test-data-repo/location/make_location.py
+
+echo "Appending tool-data/shared/ucsc/builds.txt.buildbot to tool-data/shared/ucsc/builds.txt"
+cat tool-data/shared/ucsc/builds.txt.buildbot >> tool-data/shared/ucsc/builds.txt
 
 python ./scripts/fetch_eggs.py all

community_datatypes_conf.xml.sample

-<?xml version="1.0"?>
-<datatypes>
-    <registration>
-        <datatype extension="tool" type="galaxy.webapps.community.datatypes:Tool" model="galaxy.webapps.community.model:Tool"/>
-        <datatype extension="toolsuite" type="galaxy.webapps.community.datatypes:ToolSuite" model="galaxy.webapps.community.model:Tool"/>
-    </registration>
-</datatypes>

community_wsgi.ini.sample

 # Temporary storage for additional datasets, this should be shared through the cluster
 new_file_path = database/tmp
 
-# Where templates are stored
-template_path = lib/galaxy/webapps/community/templates
-
 # Session support (beaker)
 use_beaker_session = True
 session_type = memory
 # path to sendmail
 sendmail_path = /usr/sbin/sendmail
 
+# For use by email messages sent from the tool shed
+#smtp_server = smtp.your_tool_shed_server
+#email_from = your_tool_shed_email@server
+
+# The URL linked by the "Support" link in the "Help" menu.
+#support_url = http://wiki.g2.bx.psu.edu/Support
+
 # Write thread status periodically to 'heartbeat.log' (careful, uses disk space rapidly!)
 ## use_heartbeat = True
 
     paths, and configure for start at boot with `update-rc.d galaxy defaults`.
     Also written and submitted by James Casbon.
 
+galaxy.fedora-init:
+
+    init script for Fedora/RedHat/Scientific Linux/CentOS.  Copy to
+    /etc/init.d/galaxy, modify paths, and configure for start at boot with
+    `chkconfig galaxy on`.  Written and submitted by Brad Chapman.
+
 galaxy.solaris-smf.xml:
 
     SMF Manifest for Solaris 10 and OpenSolaris.  Import with `svccfg import
     galaxy.solaris-smf.xml`.
+
+gls.pl:
+
+    "Galaxy ls", for sites where Galaxy logins match system logins, this script
+    can be used to list the filesystem paths to a user's history datasets.
+    Requires site modifications.  Written and submitted by Simon McGowan.

contrib/collect_sge_job_timings.sh

+#!/bin/sh
+
+##
+## CHANGE ME to galaxy's database name
+## 
+DATABASE=galaxyprod
+
+##
+## AWK script to extract the relevant fields of SGE's qacct report
+##   and write them all in one line.
+AWKSCRIPT='
+$1=="jobnumber" { job_number = $2 }
+$1=="qsub_time" { qsub_time = $2 }
+$1=="start_time" { start_time = $2 }
+$1=="end_time" { end_time = $2 
+        print job_number, qsub_time, start_time, end_time
+}
+'
+
+FIFO=$(mktemp -u) || exit 1
+mkfifo "$FIFO" || exit 1
+
+##
+## Write the SGE/QACCT job report into a pipe
+## (later will be loaded into a temporary table)
+qacct -j |
+    egrep "jobnumber|qsub_time|start_time|end_time" |
+    sed 's/  */\t/'  |
+    awk -v FS="\t" -v OFS="\t" "$AWKSCRIPT" |
+    grep -v -- "-/-" > "$FIFO" &
+
+##
+##  The SQL to generate the report
+##
+SQL="
+--
+-- Temporary table which contains the qsub/start/end times, based on SGE's qacct report.
+--
+CREATE TEMPORARY TABLE sge_times (
+  sge_job_id INTEGER PRIMARY KEY,
+  qsub_time TIMESTAMP WITHOUT TIME ZONE,
+  start_time TIMESTAMP WITHOUT TIME ZONE,
+  end_time TIMESTAMP WITHOUT TIME ZONE
+);
+
+COPY sge_times FROM '$FIFO' ;
+
+--
+-- Temporary table which contains a unified view of all galaxy jobs.
+-- for each job:
+--   the user name, total input size (bytes), and input file types, DBKEY
+--   creation time, update time, SGE job runner parameters
+-- If a job had more than one input file, then some parameters might not be accurate (e.g. DBKEY)
+-- as one will be chosen arbitrarily
+CREATE TEMPORARY TABLE job_input_sizes AS
+SELECT
+ job.job_runner_external_id as job_runner_external_id,
+ min(job.id) as job_id,
+ min(job.create_time) as job_create_time,
+ min(job.update_time) as job_update_time,
+ min(galaxy_user.email) as email,
+ min(job.tool_id) as tool_name,
+-- This hack requires a user-custom aggregate function, comment it out for now
+-- textcat_all(hda.extension || ' ') as file_types,
+ sum(dataset.file_size) as total_input_size,
+ count(dataset.file_size) as input_dataset_count,
+ min(job.job_runner_name) as job_runner_name,
+-- This hack tries to extract the DBKEY attribute from the metadata JSON string
+ min(substring(encode(metadata,'escape') from '\"dbkey\": \\\\[\"(.*?)\"\\\\]')) as dbkey
+FROM
+ job,
+ galaxy_user,
+ job_to_input_dataset,
+ history_dataset_association hda,
+ dataset
+WHERE
+ job.user_id = galaxy_user.id
+ AND
+ job.id = job_to_input_dataset.job_id
+ AND
+ hda.id = job_to_input_dataset.dataset_id
+ AND
+ dataset.id = hda.dataset_id
+ AND
+ job.job_runner_external_id is not NULL
+GROUP BY
+ job.job_runner_external_id;
+
+
+--
+-- Join the two temporary tables, create a nice report
+--
+SELECT
+ job_input_sizes.job_runner_external_id as sge_job_id,
+ job_input_sizes.job_id as galaxy_job_id,
+ job_input_sizes.email,
+ job_input_sizes.tool_name,
+-- ## SEE previous query for commented-out filetypes field
+-- job_input_sizes.file_types,
+ job_input_sizes.job_runner_name as sge_params,
+ job_input_sizes.dbkey,
+ job_input_sizes.total_input_size,
+ job_input_sizes.input_dataset_count,
+ job_input_sizes.job_update_time - job_input_sizes.job_create_time as galaxy_total_time,
+ sge_times.end_time - sge_times.qsub_time as sge_total_time,
+ sge_times.start_time - sge_times.qsub_time as sge_waiting_time,
+ sge_times.end_time - sge_times.start_time as sge_running_time,
+ job_input_sizes.job_create_time as galaxy_job_create_time
+-- ## no need to show the exact times, the deltas (above) are informative enough
+-- job_input_sizes.job_update_time as galaxy_job_update_time,
+-- sge_times.qsub_time as sge_qsub_time,
+-- sge_times.start_time as sge_start_time,
+-- sge_times.end_time as sge_end_time
+FROM
+ job_input_sizes
+LEFT OUTER JOIN
+ SGE_TIMES
+ON (job_input_sizes.job_runner_external_id = sge_times.sge_job_id)
+ORDER BY
+ galaxy_job_create_time
+ 
+"
+
+echo "$SQL" | psql --pset "footer=off" -F"  " -A --quiet "$DATABASE"
+
+

contrib/galaxy.fedora-init

+#!/bin/bash
+#
+# Init file for Galaxy (http://galaxyproject.org/)
+#   Suitable for use on Fedora and derivatives (RedHat Enterprise Linux, Scientific Linux, CentOS)
+#
+# Contributed by Brad Chapman
+#
+# chkconfig: 2345 98 20
+# description: Galaxy http://galaxyproject.org/
+
+#--- config
+
+SERVICE_NAME="galaxy"
+RUN_AS="galaxy"
+RUN_IN="/path/to/galaxy-dist"
+
+#--- main actions
+
+start() {
+	echo "Starting $SERVICE_NAME... "
+	cmd="cd $RUN_IN && sh run.sh --daemon"
+	case "$(id -un)" in
+		$RUN_AS)
+			eval "$cmd"
+			;;
+		root)
+			su - $RUN_AS -c "$cmd"
+			;;
+		*)
+			echo "*** ERROR *** must be $RUN_AS or root in order to control this service" >&2
+			exit 1
+	esac
+	echo "...done."
+}
+
+stop() {
+	echo -n "Stopping $SERVICE_NAME... "