Commits

beroe  committed 892ef3e

Updated vars_retrieve_concept to retrieve associations and concept hierarchies (with -a and -k)

  • Participants
  • Parent commits 4cd21df

Comments (0)

Files changed (1)

File database/vars_retrieve_concept.py

  -d : get dive summary
  -s : get samples collected during dive
  -c : get all annotations of concepts for all dives
+ -a : get all associations between two or more groups, separated by /
+ -k : get all species for a higher taxon (takes only one name)
+       secret option: put 1 at the end for quoted csv
  
+  
 Usage: 
 	{0} -s D422 R500
 	{0} -d D422,v360 
 	{0} -c Aulacoctena,"Bathyctena chuni"
+	{0} -d v{{2770..2880}}    # get a dive summary for all dives between those two numbers
+	{0} -k Narcomed, Scyphozo # All species for both Narcos and Scyphos 
+	{0} -a Narcome, Scyphoz + Amphipod # Associations between Narcos and Amphipods
 	
-version 1.1: Parses the V2132 format. Can also take dive number w/o vehicle
+version 1.4: Added extraction of species from higher taxon
+version 1.3: Handle sample numbers in V3875-D5 format
 version 1.2: Flag to perform different queries at run time
+version 1.1: Parses the V2132 format. Can also take dive number w/o vehicle
   [haddock at MBARI dot org]
 Requires: pymssql module, freetds, and unixODBC (I think)
 
 # to run it on pismo, you have to use the funky path to python so
 # it will use v2.7 instead of 2.5
 
-import pymssql
 import time
 import sys
+import pymssql
+
 
 
 def parsedivenumbers(Dive,shortname=False):
+	Dive = Dive.strip()
 	DiveDict = dict(zip(list("TVRD"),["Tiburon","Ventana","Doc Ricketts","Doc Ricketts"]))
 	if shortname:
 		DiveDict = dict(zip(list("TVRD"),["tibr","vnta","docr","docr"]))
+	if "-" in Dive:
+		DFields = Dive.split("-")
+		Dive = DFields[0]
+		Sample = DFields[1]
+	else:
+		Sample = ""
 	if Dive[0].isdigit():
 		Veh= "%"
 		Num = Dive.strip()
 	else:
-		Veh = DiveDict[Dive[0]]
+		Veh = DiveDict[Dive[0].upper()]
 		Num = Dive[1:].strip()
-	return Veh,Num
+	return Veh,Num,Sample
 
 def getsampleinfo(SampleID):
 	"""Incomplete?"""
 
 	execquery(DQuery, mydatabase="EXPD")
 
+def getconceptfromKB(higher_taxa,style=1):
+	""" Retrieve all subordinate taxonomic concepts from the knowledgebase
+ Style 1 (default) is quoted and comma delimited. 
+ Style 0 is a list with one concept per line
+ """
+	ConString = """ parent_name LIKE '%{}%' """
+	Splits = higher_taxa.split(",")
+	
+	if len(Splits) > 1:
+		ConList = Splits
+	else:
+		ConList = [higher_taxa]
+	SearchNames = ""
+	outstr=""
+	NumCons = 0
+	delimit=["\n",", "][style] # zero gets newline
+	for Con in ConList:
+		Constraint = ConString.format(Con.strip())
+		# print "CONSTRAINT: ", Constraint
+	
+# Using knowledgebase (VARS_KB), select all the species for a group
+		Cquery = """
+		WITH org_name AS (
+	        SELECT DISTINCT
+	            parent.id AS parent_id, parentname.ConceptName as parent_name,
+	            child.id AS child_id, childname.ConceptName as child_name
+	        FROM
+	            Concept parent RIGHT OUTER JOIN 
+	            Concept child ON child.ParentConceptID_FK = parent.id LEFT OUTER JOIN
+	            ConceptName childname ON childname.ConceptID_FK = child.id LEFT OUTER JOIN
+	            ConceptName parentname ON parentname.ConceptID_FK = parent.id
+	        WHERE
+	            childname.NameType = 'Primary' AND
+	            parentname.NameType = 'Primary' ), 
+	jn AS (   SELECT            parent_id, parent_name, child_id, child_name FROM org_name 
+			WHERE ({}) 
+			UNION ALL SELECT C.parent_id, C.parent_name, C.child_id, C.child_name FROM jn AS p 
+			JOIN org_name AS C ON C.parent_id = p.child_id ) 
+	SELECT DISTINCT jn.parent_id, jn.parent_name, jn.child_id, jn.child_name 
+	FROM jn ORDER BY 1;
+	""".format(Constraint)
+
+		NumRecs, SpeciesList = execquery(Cquery, mydatabase="VARS_KB")
+		SearchNames += " and ".join(Con)
+		NumCons += NumRecs
+		sys.stderr.write("## Found %d concepts for query of %s\n"%(NumRecs,Con))
+		try:
+			# Get the full name of the query to include it too, e.g. Chrysaora and not just speciesprint
+			if style:
+				FirstName = "'{}'".format(SpeciesList.split("\t")[1])
+				SpNames = [ "'{}'".format(F.split("\t")[3]) for F in SpeciesList.rstrip().split("\n")]
+			else:
+				FirstName = SpeciesList.split("\t")[1]
+				SpNames = [ F.split("\t")[3] for F in SpeciesList.rstrip().split("\n")]
+		except IndexError:
+			# sys.stderr.write("No lower concepts found for {}\n".format(higher_taxa))
+			if style:
+				FirstName = "'{}'".format(Con)
+			else:
+				FirstName = Con
+			SpNames = []
+			
+		if outstr:
+			head=delimit
+		else:
+			head=""
+		outstr += head + delimit.join([ FirstName ] + SpNames)
+
+	return NumCons,outstr
+	
+def findassociation(conceptstrings):
+	try:
+		HostString,AssocString = " ".join(conceptstrings).split("+")
+		HostList = HostString.split()
+		AssocList = AssocString.split()
+	except ValueError:
+		sys.exit("** To search for associations, provide two or more concepts separated by a plus\n")
+	
+	""" Make your own custom query """
+	# print "HostString",  (getconceptfromKB([HostString]))
+	# print "AssocString", AssocList
+	FromCon = """ ann.ConceptName IN ({}) """.format(getconceptfromKB(HostString)[1])
+	ToCon = """ ann.ToConcept IN ({}) """.format(getconceptfromKB(AssocString)[1])
+	# FromCon = """ ann.ConceptName IN ({}) """.format("'Chrysaora'")
+	# ToCon = """ ann.ToConcept IN ({}) """.format("'Doryteuthis opalescens'")
+	# Take all args as a string, split on commas (genus species have spaces)
+	
+	Aquery = """ SELECT
+	ann.DiveNumber,ann.DEPTH, ann.ConceptName,ann.Latitude, ann.Longitude, ann.Temperature, ann.Oxygen, ann.Salinity,
+	ann.RecordedDate, ann.RovName, ann.TapeTimeCode, ann.Zoom,
+	ann.ObservationID_FK AS obsid, ann.LinkValue, ann.linkName,
+	ann.VideoArchiveSetID_FK AS vasid, ann.ShipName,
+	ann.Associations,ann.videoArchiveName, ann.CameraDirection,
+	ann.ChiefScientist, ann.FieldWidth, ann.Image, ann.Light, ann.Notes,
+	ann.Observer, ann.ToConcept
+	FROM Annotations AS ann
+	WHERE (
+	( ({0}) AND ({1}) ) 
+	AND ann.linkName LIKE '%feeding-association%'   
+	) """.format(FromCon,ToCon)
+	# print "query: ",Aquery
+	Fields = """DiveNumber\tDEPTH\tConceptName\tLatitude\tLongitude\tTemperature\tOxygen\tSalinity\t\
+	RecordedDate\tEpochSecs\tRovName\tTapeTimeCode\tZoom\tObservationID_FK\tLinkValue\tlinkName\t\
+	VideoArchiveSetID_FK\tShipName\tObservationID_FK\tAssociations\tvideoArchiveName\tCameraDirection\t\
+	ChiefScientist\tFieldWidth\tImage\tNotes\tObserver\tAssocConcept\n"""
 
-#### Example queries, depending on database
+	sys.stderr.write("Finding all annotations for {0} \nWITH {1}...\n".format(FromCon,ToCon))
+	head = """DiveNumber	Depth	ConceptName	Latitude	Longitude	Temperature	Oxygen	EpochSecs	Salinity	RecordedDate	RovName	TapeTimeCode	Zoom	ObservationID_FK	LinkValue	linkName	VideoArchiveSetID_FK	ShipName	Associations	videoArchiveName	CameraDirection	ChiefScientist	FieldWidth	Image	Light	Notes	Observer	AssocConcept"""
+	# print Fields
+	NumFound,Outstr = execquery(Aquery)
+	sys.stderr.write("## Found %d annotations for %s...\n" % (NumFound,conceptstrings))
+	return Outstr
+	
 def getsamples(DiveList):
+	
 	# removed Image, but add back in if you want Image URL
 	# for epoch secs add this back in: DateDiff(ss, '01/01/70', RecordedDate) AS Esecs,
 	#DiveList = DiveListAsString.split(",")
 	SQuery = """
 	SELECT DISTINCT
 	      CONVERT(varchar(22), RecordedDate, 120) as DateTime24,
-	      RovName, DiveNumber, ConceptName,
-	      Associations,
+	      RovName, DiveNumber, ConceptName, Associations,
 	      Depth, Latitude, Longitude, TapeTimeCode, ISNULL(Observer, '') AS Observer,
 	      AnnotationMode, ObservationID_FK, LinkName, LinkValue
 	FROM  dbo.Annotations
 	WHERE (RovName like '{0}') AND (DiveNumber = {1})
-	AND (LinkName LIKE '%sample-reference%')
-	ORDER BY TapeTimeCode, DateTime24
-	"""
-	sys.stderr.write("Finding samples...\n")
+	AND (LinkName LIKE '%sample-reference%') {2}
+	ORDER BY TapeTimeCode, DateTime24 ; """
+	#sys.stderr.write("Finding samples...\n")
 	SHead = "DateTime24	RovName	DiveNumber	ConceptName	Associations	Depth	Latitude	Longitude	TapeTimeCode	Observer	AnnotationMode	ObservationID_FK	SampleRefName"
-	print SHead
+	if __name__== "__main__":
+		print SHead
 	NumFound = 0
+	TotalFound = 0
+	AllOut = ""
 	if "," in DiveList[0]:
 		DiveList = DiveList[0].split(",") 
 	for Dive in DiveList:
-		ROVName,DiveNum = parsedivenumbers(Dive)
-		NumFound += execquery(SQuery.format(ROVName, DiveNum), mydatabase="VARS")
-	sys.stderr.write("## Found %d samples for the query...\n" % (NumFound))
+		ROVName,DiveNum,Sample = parsedivenumbers(Dive)
+		if Sample:
+			SampleString = "AND (LinkValue LIKE '%{}') ".format(Sample.lower())
+		else:
+			SampleString = ""
+		NumFound, Outstr = execquery(SQuery.format(ROVName, DiveNum, SampleString), mydatabase="VARS")
+		TotalFound += NumFound
+		AllOut += Outstr
+	return AllOut
+	sys.stderr.write("## Found %d samples for the query...\n" % (TotalFound))
 
+	
 def getdivesummary(DiveList):
 	# removed Image, but add back in if you want Image URL
 	# for epoch secs add this back in: DateDiff(ss, '01/01/70', RecordedDate) AS Esecs,
 	if "," in DiveList[0]:
 		DiveList = DiveList[0].split(",") 
 	NumFound = 0
+	TotalFound = 0
 	for Dive in DiveList:
-		ROVName,DiveNum = parsedivenumbers(Dive, shortname = True)
+		ROVName,DiveNum,S = parsedivenumbers(Dive, shortname = True)
 		# print SQuery.format(ROVName,DiveNum)
-		NumFound += execquery(query = SQuery.format(ROVName, DiveNum), mydatabase="EXPD")
-	sys.stderr.write("## Found %s dive summaries...\n" % (NumFound))
-
+		NumFound,Outstr = execquery(query = SQuery.format(ROVName, DiveNum), mydatabase="EXPD")
+		TotalFound +=NumFound
+		print Outstr
+	sys.stderr.write("## Found %s dive summaries...\n" % (TotalFound))
 
 def findconcept(conceptstrings):
 	ConString = """ ann.ConceptName like '%%%s%%' """
 	sys.stderr.write("Finding all annotations for %s...\n" % conceptstrings)
 	head = """DiveNumber	Depth	ConceptName	Latitude	Longitude	Temperature	Oxygen	EpochSecs	Salinity	RecordedDate	RovName	TapeTimeCode	Zoom	ObservationID_FK	LinkValue	linkName	VideoArchiveSetID_FK	ShipName	Associations	videoArchiveName	CameraDirection	ChiefScientist	FieldWidth	Image	Light	Notes	Observer"""
 	print Fields
-	NumFound = execquery(query)
+	NumFound,Outstr = execquery(query)
+	print Outstr
 	sys.stderr.write("## Found %d annotations for %s...\n" % (NumFound,conceptstrings))
 
 def execquery(query, mydatabase="VARS"):
 
 	# grab the right server name, depending on what database is being used...
 	servername = serverlookup[mydatabase]
-	username = "GETFROMBRIAN" # contact brian schlining (brian@mbari.org)
-	pw = "GETFROMBRIAN"
+	username = "everyone"
+	pw = "guest"
 
 
 	# Try the ** operator with the parameters as a dictionary
 	#Write all records to one big file:
 	# to write to a different file per dive, uncomment the "write to individual files lines below..."
 
-
 	NumRecords=0
 
 	cur.execute(query)
+	outstr=""
 	for row in cur:
 		NumRecords += 1
 		#### THIS HAS TO BE FIXED??
 		# strlist.insert(8,str(epoch) )
 		#print "%s\t%s\t%s" % (row[7],row[8],row[9])
 		#print "\t".join(strlist)
-		outstr = "\t".join(strlist)
-		print outstr
+		outstr += "\t".join(strlist)
+		outstr += "\n"
 		#print epoch
 		#print "\t".join(row)
 		#print "ID=%d, Name=%s" % (row['id'], row['name'])
 
-	return NumRecords
 	conn.close()
+	return NumRecords,outstr
 
 ### START OF PROGRAM
-
-if len(sys.argv)<=1:
-	sys.stderr.write(__doc__.format(sys.argv[0]))
-else:
-	if sys.argv[1] == "-c":
-		findconcept(sys.argv[2:])
-		if len(sys.argv) > 3:
-			sys.stderr.write("** Concepts should be comma-separated or surrounded in quotes.\n Space-separated lists will not work as expected")
-	elif sys.argv[1] == "-s":
-		getsamples(sys.argv[2:])
-	elif sys.argv[1] == "-d":
-		getdivesummary(sys.argv[2:])
-	else:
+def main():
+	
+	if len(sys.argv)<=1:
 		sys.stderr.write(__doc__.format(sys.argv[0]))
-
+	else:
+		if sys.argv[1] == "-c":
+			findconcept(sys.argv[2:])
+			if len(sys.argv) > 3:
+				sys.stderr.write("** Concepts should be comma-separated or surrounded in quotes.\n Space-separated lists will not work as expected")
+		elif sys.argv[1] == "-s":
+			
+			print( getsamples(sys.argv[2:]).rstrip("\n") )
+		elif sys.argv[1] == "-d":
+			getdivesummary(sys.argv[2:])
+		elif sys.argv[1] == "-k":
+			if sys.argv[-1] == "1":
+				print( getconceptfromKB(" ".join(sys.argv[2:-1]), style=1)[1])
+			else:
+				print( getconceptfromKB(" ".join(sys.argv[2:]), style=0)[1])
+		elif sys.argv[1] == "-a":
+			print( findassociation(sys.argv[2:]) )
+		else:
+			sys.stderr.write(__doc__.format(sys.argv[0]))
+
+if __name__ == "__main__":
+	main()
 
 """
 DiveSummary in expd has the following fields:
 rovname
 shipid
 shipname
-"""
+"""