1. beroe
  2. MBARI public

Commits

beroe  committed d23a15f

updated vars retrieve concept

  • Participants
  • Parent commits 539a723
  • Branches master

Comments (0)

Files changed (3)

File database/README.md

View file
  • Ignore whitespace
 
 # *File descriptions:
 
-* **VARSReplacementDictionary.py**
 * **flickr_search2.py**
 	- search flickr for images with certain tags and in certain bounding boxes
 * **flickrapi2.py**
+    - required for `flickr_search2`
 * **vars_image_move.py** 
-	- takes images retrieved by VARS query and reorganizes them into folders
+	- takes images retrieved in bulk by VARS query and reorganizes them into folders
 * **vars_retrieve_concept.py**
  	- search MBARI database for dive summaries, samples, and annotations
-* **varscleanup.py**
- 	- for cleaning up concept lists downloaded from VARS
+* **crossref_parse.py**
+ 	- Using a flat list of refs, try searching for crossref biblio records.
 
 
 # *Notes on installation

File database/vars_retrieve_concept.py

View file
  • Ignore whitespace
 To find samples from a dive, run with the appropriate flag as true,
 and send in a space- or comma-delimited list of dive numbers.
 NOTE: Comma-separated lists cannot have spaces too...
+NOTE: To get frame grabs, save output of -c search then do:
+    for X in $(cut -f11 Erenna-non-cornuta-FGs.txt); do curl -O $X; done;
+To get PNGs (or see -id option below): 
+    for J in $(sed -E 's/jpg/png/g' FGs.txt | cut -f11); do curl -O "$J"; done
 
 You can search in three ways:
- -d : get dive summary
- -s : get samples collected during dive
- -c : get all annotations of concepts for all dives
- -a : get all associations between two or more groups, separated by +
-       secret option: put 1 at the end for full table (nothing is summary)
- -k : get all species for a higher taxon (takes only one name)
-       secret option: put 1 at the end for quoted csv, 2 for full table
+ -d  : get dive summary
+ -s  : get samples collected during dive
+ -i  : get URL of frame grabs for concept
+ -id : Download all the images for a concept (only provide one concept)
+ -c  : get all annotations of concepts for all dives
+ -a  : get all associations between two or more groups, separated by +
+        secret option: put 1 at the end for full table (nothing is summary)
+ -k  : get all species for a higher taxon (takes only one name)
+        secret option: put 1 at the end for quoted csv, 2 for full table
  
   
 Usage: 
 	{0} -s D422 R500
 	{0} -d D422,v360 
 	{0} -c Aulacoctena,"Bathyctena chuni"
+	{0} -i Paraphronima
+	{0} -id Paraphronima
 	{0} -d v{{2770..2880}}    # get a dive summary for all dives between those two numbers
 	{0} -k Narcomed, Scyphozo # All species for both Narcos and Scyphos 
 	{0} -a Narcome, Scyphoz + Amphipod # Associations between Narcos and Amphipods
 #     brew install --with-unixodbc freetds
 #     sudo pip install pymssql
 
-import time
+# INTALLATION FOR UBUNTU / LINUX:
+# sudo apt-get install unixodbc
+# sudo apt-get install freetds-dev
+# sudo pip install pymssql
+
 import sys
+import subprocess
+import os
 import pymssql
 
 
-
 def parsedivenumbers(Dive,shortname=False):
 	Dive = Dive.strip()
 	DiveDict = dict(zip(list("TVRD"),["Tiburon","Ventana","Doc Ricketts","Doc Ricketts"]))
 		Num = Dive[1:].strip()
 	return Veh,Num,Sample
 
+
 def getsampleinfo(SampleID):
 	"""Incomplete?"""
 	DQuery = "SELECT DISTINCT SampleRefName, DescriptionComment from MBARI_Samples.dbo.Sample where SampleID < 1000; "
 
 	execquery(DQuery, mydatabase="EXPD")
 
+
 def getconceptfromKB(higher_taxa,style=1):
 	""" Retrieve all subordinate taxonomic concepts from the knowledgebase
  Style 1 (default) is quoted and comma delimited. 
 	SearchNames = ""
 	outstr=""
 	NumCons = 0
-	delimit=["\n",", ",""][style] # zero gets newline
+	delimit=["\n",", ",""][style]  # zero gets newline
 	for Con in ConList:
 		Constraint = ConString.format(Con.strip())
 		# print "CONSTRAINT: ", Constraint
 	
 # Using knowledgebase (VARS_KB), select all the species for a group
 		Cquery = """
-		WITH org_name AS (
-	        SELECT DISTINCT
-	            parent.id AS parent_id, parentname.ConceptName as parent_name,
-	            child.id AS child_id, childname.ConceptName as child_name
-	        FROM
-	            Concept parent RIGHT OUTER JOIN 
-	            Concept child ON child.ParentConceptID_FK = parent.id LEFT OUTER JOIN
-	            ConceptName childname ON childname.ConceptID_FK = child.id LEFT OUTER JOIN
-	            ConceptName parentname ON parentname.ConceptID_FK = parent.id
-	        WHERE
-	            childname.NameType = 'Primary' AND
-	            parentname.NameType = 'Primary' ), 
-	jn AS (   SELECT            parent_id, parent_name, child_id, child_name FROM org_name 
-			WHERE ({}) 
-			UNION ALL SELECT C.parent_id, C.parent_name, C.child_id, C.child_name FROM jn AS p 
-			JOIN org_name AS C ON C.parent_id = p.child_id ) 
-	SELECT DISTINCT jn.parent_id, jn.parent_name, jn.child_id, jn.child_name 
-	FROM jn ORDER BY 1;
-	""".format(Constraint)
+WITH org_name AS (
+        SELECT DISTINCT
+            parent.id AS parent_id, parentname.ConceptName as parent_name,
+            child.id AS child_id, childname.ConceptName as child_name
+        FROM
+            Concept parent RIGHT OUTER JOIN 
+            Concept child ON child.ParentConceptID_FK = parent.id LEFT OUTER JOIN
+            ConceptName childname ON childname.ConceptID_FK = child.id LEFT OUTER JOIN
+            ConceptName parentname ON parentname.ConceptID_FK = parent.id
+        WHERE
+            childname.NameType = 'Primary' AND
+            parentname.NameType = 'Primary' ), 
+jn AS (   SELECT            parent_id, parent_name, child_id, child_name FROM org_name 
+       WHERE ({}) 
+       UNION ALL SELECT C.parent_id, C.parent_name, C.child_id, C.child_name FROM jn AS p 
+       JOIN org_name AS C ON C.parent_id = p.child_id ) 
+SELECT DISTINCT jn.parent_id, jn.parent_name, jn.child_id, jn.child_name 
+FROM jn ORDER BY 1;
+""".format(Constraint)
 
 		NumRecs, SpeciesList = execquery(Cquery, mydatabase="VARS_KB")
 		SearchNames += " and ".join(Con)
 		outstr += head + delimit.join([ FirstName ] + SpNames)
 
 	return NumCons,outstr
-	
+
+
 def findassociation(conceptstrings):
 	try:
 		HostString,AssocString = " ".join(conceptstrings).split("+")
-		HostList = HostString.split()
-		AssocList = AssocString.split()
+		# HostList = HostString.split()
+		# AssocList = AssocString.split()
 	except ValueError:
 		sys.exit("** To search for associations, provide two or more concepts separated by a plus\n")
 	
 	# sys.stderr.write("## Found %d associations for %s...\n" % (TotalNum,conceptstrings))
 
 	return TotalNum, NumFields + Outstr
+
 	
 def getsamples(DiveList):
 	
 	# removed Image, but add back in if you want Image URL
 	# for epoch secs add this back in: DateDiff(ss, '01/01/70', RecordedDate) AS Esecs,
-	#DiveList = DiveListAsString.split(",")
+	# DiveList = DiveListAsString.split(",")
 	SQuery = """
 	SELECT DISTINCT
 	      CONVERT(varchar(22), RecordedDate, 120) as DateTime24,
 	return AllOut
 	sys.stderr.write("## Found %d samples for the query...\n" % (TotalFound))
 
+
 	
 def getdivesummary(DiveList):
 	# removed Image, but add back in if you want Image URL
 	NumFound = 0
 	TotalFound = 0
 	for Dive in DiveList:
-		ROVName,DiveNum,S = parsedivenumbers(Dive, shortname = True)
+		ROVName,DiveNum,S = parsedivenumbers(Dive, shortname=True)
 		# print SQuery.format(ROVName,DiveNum)
-		NumFound,Outstr = execquery(query = SQuery.format(ROVName, DiveNum), mydatabase="EXPD")
+		NumFound,Outstr = execquery(query=SQuery.format(ROVName, DiveNum), mydatabase="EXPD")
 		TotalFound +=NumFound
 		print Outstr.rstrip("\n")
 	sys.stderr.write("## Found %s dive summaries...\n" % (TotalFound))
 
+
 def findconcept(conceptstrings):
 	ConString = """ ann.ConceptName like '%%%s%%' """
 	# Take all args as a string, split on commas (genus species have spaces)
 	print Outstr
 	sys.stderr.write("## Found %d annotations for %s...\n" % (NumFound,conceptstrings))
 
+def getimages(conceptstrings,getThem=False):
+	ConString = """ ann.ConceptName like '%%%s%%' """
+	# Take all args as a string, split on commas (genus species have spaces)
+	Splits = " ".join(conceptstrings).split(",")
+	ConList = [ConString % (Con.strip()) for Con in Splits]
+	Constraint = "( %s )" % (" OR ".join(ConList))
+	# Concept = " ".join(sys.argv[1:])
+	# print Con
+	query = """ SELECT
+	ann.Image
+	FROM
+	Annotations AS ann
+	where
+	%s """ % Constraint
+
+	Fields = """Image"""
+
+	sys.stderr.write("Finding all annotations for %s...\n" % conceptstrings)
+	NumFound,Outstr = execquery(query)
+	for S in Outstr.split('\n'):
+		if len(S) > 6:
+			if getThem:
+				Fname = os.path.split(S)[-1]
+				URLtext = "curl -s '{0}' > '{1}_{2}'".format(S,Con,Fname)
+				print URLtext
+				ProcOut = subprocess.Popen(URLtext, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,shell=True)
+			else:
+				print S
+	sys.stderr.write("## Found %d annotations for %s...\n" % (NumFound,conceptstrings))
+
 def execquery(query, mydatabase="VARS"):
 	serverlookup = {
 		"EXPD"	 : "solstice.shore.mbari.org",
 
 	# grab the right server name, depending on what database is being used...
 	servername = serverlookup[mydatabase]
-	username = "everyone"
-	pw = "guest"
-
-
-	# Try the ** operator with the parameters as a dictionary
-	"""config = {
-  'user': 'scott',
-  'password': 'tiger',
-  'host': '127.0.0.1',
-  'database': 'employees',
-  'raise_on_warnings': True,
-}
-cnx = mysql.connector.connect(**config)
-cnx.close()
-"""
+	username = "GETFROMBRIAN"
+	pw = "GETFROMBRIAN"
 
 	conn = pymssql.connect(host=servername, user=username, password=pw, database=mydatabase, as_dict=False)
 	cur = conn.cursor()
 		# #epoch = int(time.mktime(time.strptime(date_time, timepattern)))
 		strlist = ["%s"%(x) for x in row]
 		# strlist.insert(8,str(epoch) )
-		#print "%s\t%s\t%s" % (row[7],row[8],row[9])
-		#print "\t".join(strlist)
+		# print "%s\t%s\t%s" % (row[7],row[8],row[9])
+		# print "\t".join(strlist)
 		outstr += "\t".join(strlist)
 		outstr += "\n"
-		#print epoch
-		#print "\t".join(row)
-		#print "ID=%d, Name=%s" % (row['id'], row['name'])
+		# print epoch
+		# print "\t".join(row)
+		# print "ID=%d, Name=%s" % (row['id'], row['name'])
 
 	conn.close()
 	return NumRecords,outstr
 
+
 def summarizeassoc(instring):
 	""" Split the associations to a subset of pairs"""
 
 	OK = sorted(OutSet.keys())
 	for k in OK:
 		print "%8d  %s" % (OutSet[k],k)
+
 	
 ### START OF PROGRAM
 def main():
 			findconcept(sys.argv[2:])
 			if len(sys.argv) > 3:
 				sys.stderr.write("** Concepts should be comma-separated or surrounded in quotes.\n Space-separated lists will not work as expected")
-		elif sys.argv[1] == "-s":
-			
+		elif sys.argv[1] == "-i":
+			getimages(sys.argv[2:])
+			if len(sys.argv) > 3:
+				sys.stderr.write("** Concepts should be comma-separated or surrounded in quotes.\n Space-separated lists will not work as expected")
+		elif sys.argv[1] == "-id":
+			getimages(sys.argv[2],getThem=True)
+			if len(sys.argv) > 3:
+				sys.stderr.write("** For image retrieval, only give one concept, surrounded in quotes if needed.")
+		elif sys.argv[1] == "-s":			
 			print( getsamples(sys.argv[2:]).rstrip("\n") )
 		elif sys.argv[1] == "-d":
 			getdivesummary(sys.argv[2:])
 if __name__ == "__main__":
 	main()
 
+
 """
 DiveSummary in expd has the following fields:
 COLUMN_NAME

File fileutils/vars_image_move.py

  • Ignore whitespace
-#! /usr/bin/env python
-
-"""
-Version 2.0 - June 2013
-finds all <Extension> files in three vehicle-named subdirectories 
-of the current directory, and renames them by adding the
-Taxon_Vehicle_directory name to the front, moving them to the 
-<Destination> folder. This folder should be at the same level
-as the vehicle-named folders.
-
-TODO: take Destination and Taxon name as sys.argv
-
-Usage: 
-
-* CD to the folder that has the vehicle folder, 
-* Create a folder at that same level to serve as the destination
-* Manually edit the taxon tag and destination folder name (lines 25 and 27).
-
-Run with "vars_image_move.py"
-
-"""
-
-import os, sys, glob
-
-
-# Name of folder in current directory at same level as vehicles
-Destination = 'Periphyllopsis'
-# Vehicle = "Tib"
-Taxon = "Periphyllopsis"
-DEBUG = False
-
-Extension="jpg"
-
-BaseDir = os.getcwd()
-VehList = ["Doc Ricketts", "Tiburon", "Ventana"]
-
-for Veh in VehList:
-	NewPath = os.path.join(BaseDir,Veh)
-	if not os.path.exists(NewPath):
-		sys.stderr.write("####\n### No folder for vehicle {}\n####\n".format(Veh))
-	else:
-		os.chdir(NewPath)
-		# sys.stderr.write("IN {} DIRECTORY...\n".format(os.getcwd()))
-		Vehicle = Veh[0:3]
-		MyCwd = os.getcwd().split("/")[-1]
-		# e.g. Ventana
-		if DEBUG:
-			print MyCwd
-
-
-		ImageDirList = os.popen('ls -F  | grep \/','r').read().split()
-
-		# e.g. images
-		if DEBUG:
-			print "ImageDir:",ImageDirList
-
-
-		for SubDir in ImageDirList:
-			#e.g. 3602
-			if SubDir.startswith("stills"):
-				# sys.stderr.write("IN STILLS DIRECTORY...\n")
-				DirList = [o.replace("stills/","") for o in glob.glob('stills/*/*') if os.path.isdir(o)]
-			else:
-				DirList = os.popen('ls -F  %s | grep \/' % SubDir,'r').read().split()
-			if DEBUG:
-				print "** In SubDir: DirList:",DirList
-			
-			for Direct in DirList:
-				# 	InDirList = os.popen('ls -F %s/%s/*/*| grep \/' % (SubDir, Direct),'r').read().split()
-				# else: 
-				# 	InDirList = os.popen('ls -F  %s/%s/* | grep \/' % (SubDir, Direct),'r').read().split()
-				# if DEBUG:
-				# 	print "InDirList:",InDirList
-				if SubDir.startswith("stills"):
-					Direct = Direct.replace(":","/").replace("//","/")
-					# print >> sys.stderr, "SUBDIR: {}".format(SubDir)
-					# print >> sys.stderr, "DIRECT",Direct
-					
-				print "Directory", Direct,'-----------------'
-				ListCommand = 'ls ' + SubDir + Direct +'/*.'+Extension
-				if DEBUG:
-					print >> sys.stderr, "LIST COMMAND: ", ListCommand
-				FileList=os.popen(ListCommand).read().split()
-				if len(FileList)==0:
-					sys.stderr.write( "No files found in " + ListCommand + "\n")
-				else:
-					if DEBUG:
-						print "yes", FileList
-					for PathName in FileList:
-						PathName = PathName.replace("//","/")
-						if SubDir.startswith("stills"):
-							FileName= PathName.split('/')[-1]
-							Directs = "_".join(PathName.split('/')[1:3] )
-							EndName = Directs + '-' + FileName
-							if DEBUG:
-								print >> sys.stderr, "ListCommand: ", ListCommand
-							# EndName = PathName.replace("/","_").replace(":","_")	
-						else:
-							if DEBUG:
-								print "Pathname", PathName
-							FileName=PathName.split('/')[-1]
-							if (FileName) and FileName[0]==('_'):
-								FileName=FileName[1:]
-							#print FileName
-							EndName = Direct[:-1] + '-' + FileName
-						NewName= '../' + Destination + '/' + Taxon+'_' + Vehicle + '_' + EndName
-						command = 'cp ' + PathName + " " + NewName
-						print command
-						if not DEBUG:
-							os.popen(command,'r')
-					if not DEBUG:
-						os.popen('chmod 644 ../' + Destination + '/*.'+ Extension ,'r')
-os.chdir(BaseDir)		
-