Source

twools / twitter-pull / finish-corpora

Full commit
#!/bin/sh

# Finish creating extra symlink directories and setting permissions properly
# after changes in the original files (e.g. adding new ones).

check_dir_exists() {
if [ -z "$1" -o "$1" = "/" -o "$1" = "//" -o "$1" = "///" -o ! -e "$1" ]; then
  echo "ERROR: $2 '$1' not found"
  exit 1
fi
if [ ! -d "$1" ]; then
  echo "ERROR: $2 '$1' exists but is not a directory"
  exit 1
fi
}

cordir=$TWITTER_PULL_DIR
origsrel=originals

networks="longhorn markov"
machs="$networks all"
types="geotagged spritzer all"

check_dir_exists "$TWITTER_PULL_DIR" "twitter-pull storage directory"
check_dir_exists "$TWITTER_PULL_DIR/$origsrel" "twitter-pull original storage subdirectory"

for network in $networks; do
  check_dir_exists "$TWITTER_PULL_DIR/$origsrel/$src" "twitter-pull storage directory for network $network"
done

cd $cordir

function cleandir() {
  dir="$1"
  if [ -e $dir ]; then
    if [ -n "`ls $dir`" ]; then
      for file in $dir/*; do
        if [ ! -L "$file" ]; then
          echo "Non-symlink $file found in directory to be removed!  Can't proceed."
          exit 1
        fi
      done
    fi

    echo "Directory $dir has no data files, removing ..."
    rm -rf $dir
  else
    echo "Directory $dir doesn't currently exist."
  fi
  return 0
}

# Remove old symlink dirs, but make sure no data files in them
for mach in $machs; do
  for type in $types; do
    dir="$mach-$type"
    cleandir "$dir"
  done
done

# Create symlinks
for mach in $machs; do
  for type in $types; do

    case $mach in
      longhorn ) srcdirs="longhorn" ;;
      markov ) srcdirs="markov" ;;
      all ) srcdirs="longhorn markov" ;;
      * ) echo "Unrecognized download machine '$mach'"; exit 1 ;;
    esac

    case $type in
      geotagged ) prefixes="global" ;;
      spritzer ) prefixes="spritzer" ;;
      all ) prefixes="global spritzer" ;;
      * ) echo "Unrecognized Twitter source '$type'"; exit 1 ;;
    esac

    echo "Creating directory $dir of symlinks ..."
    dir="$mach-$type"
    mkdir $dir

    cd $dir
    for srcdir in $srcdirs; do
      for prefix in $prefixes; do
        for file in ../$origsrel/$srcdir/$prefix*.bz2; do
          base=`basename $file`
          ln -s $file $srcdir-$base
        done
      done
    done
    cd $cordir
  done
done

# Set permissions: group readable and executable but not writable, other no access
echo "Setting permissions ..."
chmod -R g+rX,g-w,o-rwx $cordir
for network in $networks; do
  chmod -R a-w $cordir/$origsrel/$network/*
done

# All done.
echo "Done."