Source

DocIRHadoop / InitHadoop / setupHadoop.py

#!/usr/bin/env python

import os
import subprocess

class Hadoop:
	"""Class to setup Hadoop environment."""
	def __init__(self):
		"""Initialize Hadoop Environment."""
		self.hadoop_dir = None
		if ("HADOOP_HOME" in os.environ.keys()):
			self.hadoop_dir = os.environ["HADOOP_HOME"]
		
		return

	def isHadoop(self):
		if (self.hadoop_dir is None):
			return 0
		return 1

	def setHadoopLoc(self, path):
		self.hadoop_dir = os.path.abspath(path)

		return
	
	def goHadoop(self):
		os.chdir(self.hadoop_dir)
		
		return

	def startHadoop(self):
		"""Start Hadoop."""
		try:
			subprocess.call(["bin/start-all.sh"])
		except:
			raise
		
		return

	def stopHadoop(self):
		"""Stop Hadoop."""
		try:
			subprocess.check_call(["bin/stop-all.sh"])
		except:
			raise

		return
	
	def lsHadoop(self):
		subprocess.check_call(["bin/hadoop","dfs","-ls"])
		
		return
	
	def putFileHadoop(self, source, dest):
		"""Put file(s) on HDFS."""
		cmd = ["bin/hadoop","dfs","-put"]
		source = os.path.abspath(source)
		cmd.append(source)
		cmd.append(dest)
		
		subprocess.call(cmd)
		
		return
	
	def getFileHadoop(self, source, dest):
		"""Get file(s) from HDFS."""
		cmd = ["bin/hadoop","dfs","-get"]
		dest = os.path.abspath(dest)
		cmd.append(source)
		cmd.append(dest)
		
		subprocess.call(cmd)
		
		return
	
	def delFileHadoop(self, path):
		"""Delete file from HDFS."""
		cmd = ["bin/hadoop","dfs","-rm"]
		cmd.append(path)
		
		subprocess.call(cmd)
		
		return
	
	def delFolderHadoop(self, path):
		"""Delete file from HDFS."""
		cmd = ["bin/hadoop","dfs","-rmr"]
		cmd.append(path)
		
		subprocess.call(cmd)
		
		return

def test():
	h = Hadoop()
	
	print os.path.abspath(os.curdir)
	if (not h.isHadoop()):
		h.setHadoopLoc("/usr/local/hadoop")
	h.goHadoop()
	print ">>>",os.path.abspath(os.curdir)
	
	h.startHadoop()
	
	h.lsHadoop()
	
	p = raw_input("File to Delete: ")
	h.delFolderHadoop(p)
	
	h.stopHadoop()
	
	return

if __name__ == "__main__":
	test()