Source

twools / twitter-pull / finish-corpora

#!/bin/sh

# Finish creating extra symlink directories and setting permissions properly
# after changes in the original files (e.g. adding new ones).

check_dir_exists() {
if [ -z "$1" -o "$1" = "/" -o "$1" = "//" -o "$1" = "///" -o ! -e "$1" ]; then
  echo "ERROR: $2 '$1' not found"
  exit 1
fi
if [ ! -d "$1" ]; then
  echo "ERROR: $2 '$1' exists but is not a directory"
  exit 1
fi
}

cordir=$TWITTER_PULL_DIR
origsrel=originals

networks="longhorn markov"
machs="$networks all"
types="geotagged spritzer all"

check_dir_exists "$TWITTER_PULL_DIR" "twitter-pull storage directory"
check_dir_exists "$TWITTER_PULL_DIR/$origsrel" "twitter-pull original storage subdirectory"

for network in $networks; do
  check_dir_exists "$TWITTER_PULL_DIR/$origsrel/$src" "twitter-pull storage directory for network $network"
done

cd $cordir

function cleandir() {
  dir="$1"
  if [ -e $dir ]; then
    if [ -n "`ls $dir`" ]; then
      for file in $dir/*; do
        if [ ! -L "$file" ]; then
          echo "Non-symlink $file found in directory to be removed!  Can't proceed."
          exit 1
        fi
      done
    fi

    echo "Directory $dir has no data files, removing ..."
    rm -rf $dir
  else
    echo "Directory $dir doesn't currently exist."
  fi
  return 0
}

# Remove old symlink dirs, but make sure no data files in them
for mach in $machs; do
  for type in $types; do
    dir="$mach-$type"
    cleandir "$dir"
  done
done

# Create symlinks
for mach in $machs; do
  for type in $types; do

    case $mach in
      longhorn ) srcdirs="longhorn" ;;
      markov ) srcdirs="markov" ;;
      all ) srcdirs="longhorn markov" ;;
      * ) echo "Unrecognized download machine '$mach'"; exit 1 ;;
    esac

    case $type in
      geotagged ) prefixes="global" ;;
      spritzer ) prefixes="spritzer" ;;
      all ) prefixes="global spritzer" ;;
      * ) echo "Unrecognized Twitter source '$type'"; exit 1 ;;
    esac

    echo "Creating directory $dir of symlinks ..."
    dir="$mach-$type"
    mkdir $dir

    cd $dir
    for srcdir in $srcdirs; do
      for prefix in $prefixes; do
        for file in ../$origsrel/$srcdir/$prefix*.bz2; do
          base=`basename $file`
          ln -s $file $srcdir-$base
        done
      done
    done
    cd $cordir
  done
done

# Set permissions
echo "Setting permissions ..."
chmod -R go+rX,go-w $cordir
for network in $networks; do
  chmod -R a-w $cordir/$origsrel/$network/*
done

# All done.
echo "Done."
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.