Commits

Ivo Hinkelmann  committed 4fc2c73

#i116350# Change sdf file format

  • Participants
  • Parent commits e71dac2

Comments (0)

Files changed (11)

File l10ntools/scripts/tool/fast_merge3.py

+from optparse import OptionParser
+import sdffile, sdf2
+import sys, os , cStringIO
+
+
+def make_path(merge_dir, line):
+    dir_str = merge_dir+"/"+line.project+"/"+line.source_file
+    dir_str = dir_str.replace("\\","/") 
+    x = dir_str.split("/")[0:-1]
+    dir_str = "/".join(x) 
+    return dir_str
+
+def mkdir(dir_str):
+    try:
+        os.makedirs(dir_str)
+    except:
+        pass
+
+def process_ident(ident_file, merge_dir):
+    old_str = ""
+    f = None
+    for line in ident_file:
+        dir_str = make_path(merge_dir, line)        
+        mkdir(dir_str)        
+        dir_str += "/ident.sdf2"
+        if old_str != dir_str:
+            old_str = dir_str
+            if f != None:
+                f.close()
+            try:
+                os.unlink(dir_str)
+            except:
+                pass
+            #try:
+            #    f = open(dir_str, "wa")
+            #except IOError:
+            #    print "ERROR: Can not write file " + dir_str
+            #    sys.exit(-1)
+        #f.write(str(line)+"\n")
+    #if f != None:
+    #    f.close() 
+
+def process_sdf2_data(sdf2_list, merge_dir, fh_dict, ident_dict):
+    #print sdf2_list
+    for sdf2_file in sdf2_list:
+        data = read_sdf2_data_file(sdf2_file)
+        for keyid,line in data.iteritems():        
+            #link ident + data
+            fh = get_fh(ident_dict[keyid], merge_dir, fh_dict)
+            #fh.write(str(line)+"\n")
+            fh.write(str(sdf2.convert_sdf2dataentity_to_sdf(ident_dict[keyid], line))+"\n")
+        
+def write(fh_dict):
+    for file,fh in fh_dict.iteritems():
+        try:
+            f = open(file,"wa")
+        except:
+            print "Can not write to file "+file
+            sys.exit(-1)
+        print "Writing file "+file
+        f.write(fh.getvalue())    
+        f.close()
+        fh.close()
+        fh_dict[file] = cStringIO.StringIO()
+
+def get_fh(line, merge_dir, fh_dict):
+    dir_str = make_path(merge_dir, line)
+    dir_str += "/localize.sdf"  
+#    dir_str += "/data.sdf2"  
+
+    if dir_str in fh_dict:
+        return fh_dict[dir_str]
+    else:
+        fh = cStringIO.StringIO()    
+        fh_dict[dir_str] = fh
+    return fh
+
+def read_file(file):
+    lines = []
+    try:
+        f = open(file, "r")
+        lines = [line.strip('\n') for line in f.readlines()]
+    except IOError:
+        print "ERROR: Can not read file " + file
+        sys.exit(-1)
+    else:
+        f.close()
+    return lines
+
+def read_sdf2_data_file(sdf2_file):
+    sdf2_data = dict()
+    print "Reading file "+sdf2_file
+    lines = read_file(sdf2_file)
+    for line in lines:
+        de = sdf2.Sdf2DataEntity()
+        de.set_properties(line)
+        sdf2_data[de.keyid] = de
+    return sdf2_data
+
+def read_ident_file(sdf2_ident_file):
+    ident_file = []
+    ident_dict = dict()
+    print "Reading ident file ..."
+    lines = read_file(sdf2_ident_file)
+    for line in lines:
+        ide = sdf2.Sdf2IdentEntity()
+        ide.set_properties(line)
+        ident_file.append(ide)
+        ident_dict[ide.keyid] = ide
+    return ident_file, ident_dict
+
+def read_sdf_file_list(sdf2_files):
+    file_list = []
+    lines = read_file(sdf2_files)
+    for line in lines:
+        for file in line.split(" "):
+            file_list.append(file)
+    return file_list
+
+def parse_options():
+    parser = OptionParser()
+    parser.add_option("-s", "--sdf2files",       dest="sdf2files",       metavar="FILE", help="")
+    parser.add_option("-i", "--sdf2identfile",   dest="sdf2identfile",   metavar="FILE", help="")
+    parser.add_option("-d", "--mergedir",        dest="mergedir",         metavar="DIR",  help="")
+
+    (options, args) = parser.parse_args()
+
+    is_valid = lambda x: x != None and len(x) > 0
+
+    if is_valid(options.sdf2files) and is_valid(options.mergedir) and is_valid(options.sdf2identfile):
+        return options.sdf2files, options.mergedir, options.sdf2identfile
+    else:
+        usage()
+        sys.exit(-1)
+    
+def usage():
+    return "Usage: fast_merge --sdf2files <file containing sdf file names> --sdf2identfile <identfile> --mergedir <directory>\n"
+
+
+MAX_OPEN_FILES = 35 
+sdf2_files, merge_dir, sdf2_ident_file = parse_options()
+sdf2_file_list = read_sdf_file_list(sdf2_files)
+ident_file, ident_dict = read_ident_file(sdf2_ident_file)
+
+sdf2_chunks = []
+fh_dict = dict()
+for x in range(0, int(round((len(sdf2_file_list) / MAX_OPEN_FILES) + 0.5))):
+    sdf2_chunks.append(list())
+
+list_counter = -1
+for x in range(0,len(sdf2_file_list)):
+    if x % MAX_OPEN_FILES == 0:
+        list_counter += 1
+    sdf2_chunks[list_counter].append(sdf2_file_list[x])
+
+process_ident(ident_file, merge_dir)
+for part in sdf2_chunks:
+    process_sdf2_data(part, merge_dir, fh_dict, ident_dict)
+    write(fh_dict)    
+sys.exit(0)
+
+

File l10ntools/scripts/tool/init_sdf2.py

-import sdf2, sdf, sys
+import sdf2, sdf_raw, sys
 from optparse import OptionParser
 
 # convert the data only
 
 (options, args) = parser.parse_args()
 
-if options.input and options.ref:
+if options.ref:
     try:
         fh = open(options.input, "r")
         sdf_data_lines = [line.strip('\n') for line in fh.readlines()]
     
     mylist = list()
     for line in sdf_data_lines:
-        se = sdf.SdfEntity()
+        se = sdf_raw.SdfEntity()
         se.set_properties(line)
         mylist.append(se)
     sdf_data_lines = mylist
     
     mylist1 = list()
     for line in sdf_source_lines:
-        se = sdf.SdfEntity()
+        se = sdf_raw.SdfEntity()
         se.set_properties(line)
         mylist1.append(se)
     sdf_source_lines = mylist1

File l10ntools/scripts/tool/l10n.py

     l10nstatus.changed(sys.argv)
 elif(cmd == "translated"):
     l10nstatus.translated(sys.argv)
+elif(cmd == "export"):
+    l10nstatus.export(sys.argv)
+elif(cmd == "import"):
+    l10nimport.run(sys.argv)
 elif(cmd == "help"):
     help()
 

File l10ntools/scripts/tool/l10nimport.py

+import sdf, sdf2, sys, tempfile
+
+def run(opts)
+    x = l10nimport(opts)
+    x.do_import()
+
+class l10nimport():
+    def __init__(opts):
+        self.opts = opts
+        self.filename = ""
+
+    def do_import(self):
+        if(len(opts) == 2):
+            self.filename = opts[1]
+        else:
+            print "Usage: l10n import <file>"
+            sys.exit(13)
+
+        lines = self.read_file(filename)
+        new_dict = dict()
+        data = sdf2.convert_sdf_to_sdf2Data("", lines)
+        for new_line in data:
+            new_dict[new_line.keyid] = new_line
+
+        fh = None
+		cur_lang = ""
+		localize_dict = None
+		
+        for keyid,de in new_dict.iteritems():
+			if de.langid == "en-US":
+				pass
+			elif not de.langid == cur_lang:
+				if localize_dict != None:
+					self.write_sdf2_data(cur_lang, localize_dict)
+				cur_lang = de.langid	  	
+				localize_dict = self.read_sdf2_data(de.langid)
+				localize_dict[de.keyid] = de
+			elif de.langid == cur_lang		
+				localize_dict[de.keyid] = de
+			else:
+				print "Not reachable, remove me!"
+
+	def write_sdf2_data(lang, localize_dict):
+		lines = sorted(localize_dict.values(), key=lambda a: str(a))
+		#todo: write to temp and shutil.move
+		try:
+			fh = open(os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n/source/"+lang+"localize.sdf2", "w")
+			for line in lines:
+				fh.write(line+"\n")
+		except:
+			print "Can not write to file "+ os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n/source/"+lang+"localize.sdf2"
+		fh.close()
+
+	def read_sdf2_data(lang):
+    	localize_dict = dict()
+        sdf_data = SdfData(os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n/source/"+lang+"localize.sdf2")
+        sdf_data.read()
+                
+        for line in sdf_data.iteritems():
+        	se = SdfEntity()
+            se.set_properties(line)
+            localize_dict[line.keyid] = se
+		return localize_dict
+
+    def read_file(filename)
+        try:
+            f = open(filename, "r")
+            lines = [lines.strip("\n") for line in f.readlines()]
+        except:
+            print "Can not read file "+file
+            sys.exit(13)
+        return lines

File l10ntools/scripts/tool/l10nstatus.py

 
 def run(opts):
     s = l10nstatus(opts)
-    s.exe()
+    s.exe("status")
 
 def translated(opts):
     s = l10nstatus(opts)
     s.exe("new")
 
 def changed(opts):
-    s = l10nstatus()
+    s = l10nstatus(opts)
     s.exe("changed")
 
+def export(opts):
+    s = l10nstatus(opts)
+    s.exe("export")
+
 class l10nstatus():
     NEW = "new"
     CHANGED = "changed"
 
     def exe(self, cmd=""):
         use_lang = ""
-        default_ident_location = os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n2/source/ident.sdf2"
+        default_ident_location = os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n/source/ident.sdf2"
         if len(self.opts) == 2:
-            default_location = os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n2/source/*/localize.sdf2" 
+            default_location = os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n/source/*/localize.sdf2" 
         elif len(self.opts) == 3:
             use_lang = self.opts[2]
             lang_list = use_lang.split(",")    
         else:
             sdf_data_list = list()
             for lang in lang_list:
-                sdf_data_list.append(os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n2/source/"+lang+"/localize.sdf2")
+                sdf_data_list.append(os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n/source/"+lang+"/localize.sdf2")
                         
         for file in sdf_data_list:
             mylines = list()
             del status.changed[lang]
     
     def print_status(self, status, cmd, lang):
-        if cmd == "translated":
+        if cmd == "translated" or cmd == "export":
+            tmplist = []
             for x in status.translated[lang].values():
-                print str(x)
-        elif cmd == "new":
-            for x in status.new[lang].values():
-                print str(x)
-        elif cmd == "changed":
+                tmplist.append(self.get_l10n_data(x))
+            for x in sorted(tmplist, key=lambda a: str(a)):
+                print x
+
+        if cmd == "new" or cmd == "export":
+            for x in sorted(status.new[lang].values(), key=lambda a: str(a)):
+                print str(sdf2.convert_sdf2idententity_to_sdf(self.ident[x.keyid]))
+        
+        if cmd == "changed" or cmd == "export":
+            tmplist = []
             for x in status.changed[lang].values():
-                print str(x)
-        else:
-            print "%(lang)7s" % { "lang": lang} +":"+"%(num)7d / %(per)3g" % {"num": len(status.new[lang].values()), "per": len(status.new[lang].values()) *100 / len(self.lines)} + "% new "  \
-                  +"%(num1)7d / %(per1)3g" % {"num1": len(status.translated[lang].values()), "per1": len(status.translated[lang].values()) *100 / len(self.lines)} + "% translated " \
-                  +"%(num2)7d / %(per2)3g" % {"num2": len(status.changed[lang].values()), "per2": len(status.changed[lang].values()) *100 / len(self.lines) } +"% changed "
- 
+                tmplist.append(self.get_l10n_data(x))
+            for x in sorted(tmplist, key=lambda a: str(a)):
+                print x
+
+        if cmd == "status":
+            print "%(lang)7s" % { "lang": lang} +":"+" |%(num)7d/%(per)3g" % {"num": len(status.new[lang].values()), "per": len(status.new[lang].values()) *100 / len(self.lines)} + "% new "  \
+                  +"|%(num1)7d/%(per1)3g" % {"num1": len(status.translated[lang].values()), "per1": len(status.translated[lang].values()) *100 / len(self.lines)} + "% translated " \
+                  +"|%(num2)7d/%(per2)3g" % {"num2": len(status.changed[lang].values()), "per2": len(status.changed[lang].values()) *100 / len(self.lines) } +"% changed |"
+     
+    def get_l10n_data(self, data):
+        return sdf2.convert_sdf2dataentity_to_sdf(self.ident[data.keyid], data)
+
+
     def read_ident_data(self, default_ident_location):
         try:
             fh = open(default_ident_location, "r")

File l10ntools/scripts/tool/l10nupdate.py

-import tempfile, sys, subprocess, sdf, os, sdffile
+import tempfile, sys, subprocess, sdf_raw, sdf2, os, sdffile, heapq
 
 # run localize
 # add all new strings to sourcedb
 
 class l10nupdate:
     # fix me ;)
-    default_ident_location = os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n2/source/ident.sdf2"
+    default_ident_location = os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n/source/ident.sdf2"
     
     def __init__(self):
         pass
 #            print stdout_value
 #            print stderr_value
         
+        lines = []
         try:
             f = open(tmpfile, "r")
             lines = [line.strip('\n') for line in f.readlines()]
         
         sdffile = list()
         for line in lines:
-            se = sdf.SdfEntity()
+            se = sdf_raw.SdfEntity()
             se.set_properties(line)
             sdffile.append(se)
         
         sdf_file = self.run_localize()
         ident = sdffile.IdentFile()
         ident.read(self.default_ident_location)
-        old_cnt, new_cnt, changed_cnt, moved_cnt = 0, 0, 0, 0
+        unchanged_cnt, new_cnt, changed_cnt, moved_cnt, obsolete_cnt = 0, 0, 0, 0, 0
+        new_dict, obsolete_dict = dict(), dict()
         for line in sdf_file:
-            status = ident.add_sdf(line)
-            if status == "OLD":
-                old_cnt += 1
+            status, c_ident = ident.add_sdf(line)
+            if status == "UNCHANGED":
+                unchanged_cnt += 1
             elif status == "NEW":
                 new_cnt += 1
+                new_dict[c_ident.source_hash] = c_ident
             elif status == "CHANGED":
                 changed_cnt += 1
-            elif status == "MOVED":
+        for id,line in ident.ident.iteritems():
+            if line.get_l10n_status() == "":
+                obsolete_dict[str(line)] = line
+                obsolete_cnt += 1 
+        for key,line in new_dict.iteritems():
+            candidate = self.detect_moved(obsolete_dict, line)
+            print "Candidate: "+str(candidate)
+            print "for line : "+str(line)
+            if not candidate == None:
+                candidate.set_l10n_status("MOVED")
                 moved_cnt += 1
+                obsolete_cnt -= 1
+                line.keyid = candidate.keyid
+            ident.ident[candidate.get_identifier()] = line
+            del ident.ident[line.get_identifier()]
+ 
         ident.write(self.default_ident_location)
-        print "Update:\nnew strings: "+str(new_cnt)+"\nmoved strings: "+str(moved_cnt)+"\nchanged strings: "+str(changed_cnt)+"\nuntouched strings: "+str(old_cnt)+"\ntotal strings: "+str(new_cnt+changed_cnt+old_cnt)
+        print "Update:\nnew strings: "+str(new_cnt)+"\nmoved strings: "+str(moved_cnt)+"\nchanged strings: "+str(changed_cnt)+"\nuntouched strings: "+str(unchanged_cnt)+"\nobsolete strings: "+str(obsolete_cnt)+"\ntotal strings: "+str(new_cnt+changed_cnt+unchanged_cnt)
+    
+    def detect_moved(self, ident, line):
+        # filename, gid,lid,sid,hash
+        pq = []
+        
+        for key, ident_line in ident.iteritems():
+            val = 32
+            if ident_line.source_hash == line.source_hash:
+                if ident_line.project == line.project:
+                    val /= 2
+                if ident_line.source_file == line.source_file:
+                    val /= 2
+                if ident_line.gid == line.gid:
+                    val /= 2
+                if ident_line.lid == line.lid:
+                    val /= 2
+                if ident_line.sid == line.sid:
+                    val /= 2
+                heapq.heappush(pq, (val, ident_line))
+        if len(pq) > 0:
+            val, candidate = heapq.heappop(pq)
+            # At least two equal attributes found! 
+            if val <= 8:
+                return candidate
+            else:
+                return None
+        else:
+            return None
 
+

File l10ntools/scripts/tool/l10nverify.py

         self.lines = list()
         
     def read(self):
-        default_ident_location = os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n2/source/ident.sdf2"
+        default_ident_location = os.environ["SOURCE_ROOT"]+"/"+os.environ["WORK_STAMP"]+"/ooo/l10n/source/ident.sdf2"
         
         try:
             f = open(default_ident_location, "r")

File l10ntools/scripts/tool/sdf.py

     
     def __init__ (self, filename=""):
         self._filename = filename
-    
+        self._dict = PseudoOrderedDict()
+        self._languages_found = []
+
     def __getitem__(self, key):
         if self._dict.has_key(key):
             return self._dict[key]
 import sys
 class SdfEntity: 
     # Sdf format columns
-    project         = ""
-    source_file     = ""
-    dummy1          = ""
-    resource_type   = ""
-    gid             = ""
-    lid             = ""
-    helpid          = ""
-    platform        = ""
-    dummy2          = ""
-    langid          = ""
-    text            = ""
-    helptext        = ""
-    quickhelptext   = ""
-    title           = ""
-    date            = ""
-    
-    import const
-    const._PROJECT_POS         = 0
-    const._SOURCE_FILE_POS     = 1
-    const._DUMMY1_POS          = 2
-    const._RESOURCE_TYPE_POS   = 3
-    const._GID_POS             = 4
-    const._LID_POS             = 5
-    const._HELPID_POS          = 6
-    const._PLATFORM_POS        = 7
-    const._DUMMY2_POS          = 8
-    const._LANGID_POS          = 9
-    const._TEXT_POS            = 10
-    const._HELPTEXT_POS        = 11
-    const._QUICKHELPTEXT_POS   = 12
-    const._TITLE_POS           = 13
-    const._DATE_POS            = 14
+   
+    #import const
+    _PROJECT_POS         = 0
+    _SOURCE_FILE_POS     = 1
+    _DUMMY1_POS          = 2
+    _RESOURCE_TYPE_POS   = 3
+    _GID_POS             = 4
+    _LID_POS             = 5
+    _HELPID_POS          = 6
+    _PLATFORM_POS        = 7
+    _DUMMY2_POS          = 8
+    _LANGID_POS          = 9
+    _TEXT_POS            = 10
+    _HELPTEXT_POS        = 11
+    _QUICKHELPTEXT_POS   = 12
+    _TITLE_POS           = 13
+    _DATE_POS            = 14
         
     def __init__(self, project="", source_file="", dummy1="0", resource_type="", gid="", lid="", helpid="", platform="", dummy2="0", langid="", 
                        text="", helptext="", quickhelptext="", title="", date=""):
-        self.project        = project;
-        self.source_file    = source_file;
-        self.dummy1         = dummy1;
-        self.resource_type  = resource_type;
-        self.gid            = gid;
-        self.lid            = lid;
-        self.helpid         = helpid;
-        self.platform       = platform;
-        self.dummy2         = dummy2;
-        self.langid         = langid;
-        self.text           = text;
-        self.helptext       = helptext;
-        self.quickhelptext  = quickhelptext;
-        self.title          = title;
+        self.project        = project
+        self.source_file    = source_file
+        self.dummy1         = dummy1
+        self.resource_type  = resource_type
+        self.gid            = gid
+        self.lid            = lid
+        self.helpid         = helpid
+        self.platform       = platform
+        self.dummy2         = dummy2
+        self.langid         = langid
+        self.text           = text
+        self.helptext       = helptext
+        self.quickhelptext  = quickhelptext
+        self.title          = title
+        self.keyid          = ""
+
         if date != "":
             self.date = date;
         else:
     def set_properties(self, line):
         splitted = line.split("\t")
         if len(splitted) == 15:
-            self.project        = splitted[ self.const._PROJECT_POS ]             
-            self.source_file    = splitted[ self.const._SOURCE_FILE_POS ]     
-            self.dummy1         = splitted[ self.const._DUMMY1_POS ]         
-            self.resource_type  = splitted[ self.const._RESOURCE_TYPE_POS ] 
-            self.gid            = splitted[ self.const._GID_POS ]             
-            self.lid            = splitted[ self.const._LID_POS ]             
-            self.helpid         = splitted[ self.const._HELPID_POS ]         
-            self.platform       = splitted[ self.const._PLATFORM_POS ]         
-            self.dummy2         = splitted[ self.const._DUMMY2_POS ]         
-            self.langid         = splitted[ self.const._LANGID_POS ]         
-            self.text           = splitted[ self.const._TEXT_POS ]             
-            self.helptext       = splitted[ self.const._HELPTEXT_POS ]         
-            self.quickhelptext  = splitted[ self.const._QUICKHELPTEXT_POS ] 
-            self.title          = splitted[ self.const._TITLE_POS ]         
-            self.date           = splitted[ self.const._DATE_POS ]            
+            self.project        = splitted[ self._PROJECT_POS ]             
+            self.source_file    = splitted[ self._SOURCE_FILE_POS ]     
+            self.dummy1         = splitted[ self._DUMMY1_POS ]         
+            self.resource_type  = splitted[ self._RESOURCE_TYPE_POS ] 
+            self.gid            = splitted[ self._GID_POS ]             
+            # Split key id
+            self.keyid = self.gid[:6]
+            self.gid = self.gid[6:]
+            
+            self.lid            = splitted[ self._LID_POS ]             
+            self.helpid         = splitted[ self._HELPID_POS ]         
+            self.platform       = splitted[ self._PLATFORM_POS ]         
+            self.dummy2         = splitted[ self._DUMMY2_POS ]         
+            self.langid         = splitted[ self._LANGID_POS ]         
+            self.text           = splitted[ self._TEXT_POS ]             
+            self.helptext       = splitted[ self._HELPTEXT_POS ]         
+            self.quickhelptext  = splitted[ self._QUICKHELPTEXT_POS ] 
+            self.title          = splitted[ self._TITLE_POS ]         
+            self.date           = splitted[ self._DATE_POS ]            
 
     def get_file_id(self):
         return self.project + "\\" + self.source_file
             self.gid, "\t", self.lid, "\t", self.helpid, "\t", self.platform, "\t", self.dummy2, "\t" , self.langid, 
             "\t", self.text, "\t", self.helptext, "\t", self.quickhelptext, "\t" , self.title, "\t", self.date ])
     
+    def get_id2(self):
+        return ''.join([self.project, self.gid, self.lid, self.source_file, self.resource_type, self.platform, self.helpid])   
+                 
     def get_id(self):
         return ''.join([self.project, self.gid, self.lid, self.source_file, self.resource_type, self.platform, self.helpid, self.langid])

File l10ntools/scripts/tool/sdf2.py

 def __make_id(text):
     text = text.replace("\t"," ")
     #print "calc keyid over "+text
-    if(text == "scp2 source\\ooo\\directory_ooo.ulf LngText STR_DIR_KAPITEL   "): 
-        return "keyid1"
+    #if(text == "scp2 source\\ooo\\directory_ooo.ulf LngText STR_DIR_KAPITEL   "): 
+    #    return "keyid1"
     hash = zlib.crc32(text) & 0xffffffff
     return __make_number(hash)
 
 def __make_number(h):
-    #                   1         2         3         4
-    #          1234567890123456789012345678901234567890
-    symbols = "0123456789abcdefghijklmnopqrstuvwxyz+-[=]ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    #                   1         2         3         4         5         6         7         8
+    #          12345678901234567890123456789012345678901234567890123456789012345678901234567890
+    symbols = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHI"
     order = len(symbols)
     result = ""
     while( len(result) < 6 ):
 
     return sdfident
 
+def convert_sdfentity_to_sdf2data(line):
+	c_data = Sdf2DataEntity()
+        #c_data.keyid = calc_keyid(line.project+line.source_file+line.lid+line.gid+line.platform+line.resource_type)         
+        #c_data.source_hash = calc_hash(source_line.text+source_line.helptext+source_line.quickhelptext+source_line.title)
+        c_data.lang = line.lang          
+        c_data.text = line.text   
+        c_data.helptext = line.helptext       
+        c_data.quickhelptext = line.quickhelptext  
+        c_data.title = line.title          
+ 	return c_data
+
 # PRE: sdf_list,sdf_source_list are lists of lang sdf_entities
 def convert_sdf_to_sdf2Data(sdf_source_list, sdf_list):
     sdfdata = list()
     return sdfdata
 
 
+def convert_sdf2dataentity_to_sdf(c_ident, c_data):
+    sdfl = sdf.SdfEntity()
+    sdfl.langid = c_data.lang 
+    sdfl.text = c_data.text 
+    sdfl.helptext = c_data.helptext
+    sdfl.quickhelptext = c_data.quickhelptext
+    sdfl.title = c_data.title
+    sdfl.project = c_ident.project 
+    sdfl.source_file = c_ident.source_file
+    sdfl.resource_type = c_ident.resource_type 
+    sdfl.gid = c_ident.keyid + ":" + c_ident.gid
+    sdfl.lid = c_ident.lid 
+    sdfl.platform = c_ident.sid 
+    
+    return sdfl
+
 def convert_sdf2data_to_sdf(sdf2_ident_list, sdf2_data_list):
     sdf_list = list()
 
     for c_ident,c_data in zip(sdf2_ident_list,sdf2_data_list):
-        sdfl = sdf.SdfEntity()
-        sdfl.langid = c_data.lang 
-        sdfl.text = c_data.text 
-        sdfl.helptext = c_data.helptext
-        sdfl.quickhelptext = c_data.quickhelptext
-        sdfl.title = c_data.title
-        sdfl.project = c_ident.project 
-        sdfl.source_file = c_ident.source_file
-        sdfl.resource_type = c_ident.resource_type 
-        sdfl.gid = c_ident.gid
-        sdfl.lid = c_ident.lid 
-        sdfl.platform = c_ident.sid 
+        sdfl = convert_sdf2dataentity_to_sdf(c_ident, c_data)
         sdf_list.append(sdfl)
     
     return sdf_list
 
+
+def convert_sdf2idententity_to_sdf(c_ident):
+    sdfl = sdf.SdfEntity()
+    sdfl.project = c_ident.project 
+    sdfl.source_file = c_ident.source_file
+    sdfl.resource_type = c_ident.resource_type 
+    sdfl.gid = c_ident.keyid + ":" + c_ident.gid
+    sdfl.lid = c_ident.lid 
+    sdfl.sid = c_ident.sid 
+    sdfl.lang = c_ident.lang 
+    sdfl.text = c_ident.source_text
+    sdfl.helptext = c_ident.source_helptext 
+    sdfl.quickhelptext = c_ident.source_quickhelptext 
+    sdfl.title = c_ident.source_title
+    return sdfl
+
+
 def convert_sdf2ident_to_sdf(sdf2_ident_list):
     sdf_list = list()
 
     for c_ident in sdf2_ident_list:
-        sdfl = sdf.SdfEntity()
-        sdfl.project = c_ident.project 
-        sdfl.source_file = c_ident.source_file
-        sdfl.resource_type = c_ident.resource_type 
-        sdfl.gid = c_ident.gid
-        sdfl.lid = c_ident.lid 
-        sdfl.sid = c_ident.sid 
-        sdfl.lang = c_ident.lang 
-        sdfl.text = c_ident.source_text
-        sdfl.helptext = c_ident.source_helptext 
-        sdfl.quickhelptext = c_ident.source_quickhelptext 
-        sdfl.title = c_ident.source_title
+        sdfl = convert_sdf2idententity_to_sdf(c_ident)
         sdf_list.append(sdfl)
-
     return sdf_list   
 
 
         self.source_helptext       = source_helptext
         self.source_quickhelptext  = source_quickhelptext
         self.source_title          = source_title
+        self.l10n_status = ""
 
     def set_properties(self, line):
         splitted = line.split("\t")
             self.source_quickhelptext = splitted[self.SOURCE_QUICKHELPTEXT_POS]         
             self.source_title   = splitted[self.SOURCE_TITLE_POS]         
     
+    def set_l10n_status(self, l10n_status):
+        self.l10n_status = l10n_status
+    
+    def get_l10n_status(self):
+        return self.l10n_status
+
     def __str__(self):
         return ''.join([self.project, "\t", self.source_file, "\t", self.resource_type, "\t", self.gid, "\t",  
             self.lid, "\t", self.sid, "\t", self.lang, "\t", self.keyid, "\t", self.source_hash, "\t",  

File l10ntools/scripts/tool/sdf_raw.py

+#*************************************************************************
+#
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+# 
+# Copyright 2000, 2010 Oracle and/or its affiliates.
+#
+# OpenOffice.org - a multi-platform office productivity suite
+#
+# This file is part of OpenOffice.org.
+#
+# OpenOffice.org is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License version 3
+# only, as published by the Free Software Foundation.
+#
+# OpenOffice.org is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License version 3 for more details
+# (a copy is included in the LICENSE file that accompanied this code).
+#
+# You should have received a copy of the GNU Lesser General Public License
+# version 3 along with OpenOffice.org.  If not, see
+# <http://www.openoffice.org/license.html>
+# for a copy of the LGPLv3 License.
+#
+#*************************************************************************
+
+from pseudo import PseudoSet,PseudoOrderedDict
+from time import gmtime, strftime
+
+class SdfData:
+    _filename        = "";
+    _dict            = PseudoOrderedDict()
+    _languages_found = [];
+    
+    def __init__ (self, filename=""):
+        self._filename = filename
+        self._dict = PseudoOrderedDict()
+        self._languages_found = []
+
+    def __getitem__(self, key):
+        if self._dict.has_key(key):
+            return self._dict[key]
+        else:
+            return None
+    
+    def has_key(self, key):
+        return self._dict.has_key(key)
+    
+    def __setitem__(self, key, value):
+        self._dict[key] = value
+    
+    def get_languages_found_in_sdf(self):
+        return PseudoSet(self._languages_found)
+
+    def read(self):
+        try:
+            f = open(self._filename, "r")
+            lines = [line.rstrip('\n') for line in f.readlines()] 
+        except IOError:
+            print "ERROR: Trying to read "+ self._filename
+            raise
+        else:
+            f.close()
+        for line in lines:        
+            entity = SdfEntity()
+            entity.set_properties(line)
+            self._dict[entity.get_id()] = entity
+            self._languages_found.append(entity.langid)
+
+    def write(self, filename):
+        try:
+            f = open(filename, "w+") 
+            for value in self._dict.itervalues():
+                #f.write( repr(value)+"\n" )
+                f.write(value + "\n")
+        except IOError:
+            print "ERROR: Trying to write " + filename
+            raise
+        else:
+            f.close()
+
+import sys
+class SdfEntity: 
+    # Sdf format columns
+   
+    #import const
+    _PROJECT_POS         = 0
+    _SOURCE_FILE_POS     = 1
+    _DUMMY1_POS          = 2
+    _RESOURCE_TYPE_POS   = 3
+    _GID_POS             = 4
+    _LID_POS             = 5
+    _HELPID_POS          = 6
+    _PLATFORM_POS        = 7
+    _DUMMY2_POS          = 8
+    _LANGID_POS          = 9
+    _TEXT_POS            = 10
+    _HELPTEXT_POS        = 11
+    _QUICKHELPTEXT_POS   = 12
+    _TITLE_POS           = 13
+    _DATE_POS            = 14
+        
+    def __init__(self, project="", source_file="", dummy1="0", resource_type="", gid="", lid="", helpid="", platform="", dummy2="0", langid="", 
+                       text="", helptext="", quickhelptext="", title="", date=""):
+        self.project        = project
+        self.source_file    = source_file
+        self.dummy1         = dummy1
+        self.resource_type  = resource_type
+        self.gid            = gid
+        self.lid            = lid
+        self.helpid         = helpid
+        self.platform       = platform
+        self.dummy2         = dummy2
+        self.langid         = langid
+        self.text           = text
+        self.helptext       = helptext
+        self.quickhelptext  = quickhelptext
+        self.title          = title
+        self.keyid          = ""
+
+        if date != "":
+            self.date = date;
+        else:
+            self.date = strftime("%Y-%m-%d %H:%M:%S",gmtime())
+
+
+    def set_properties(self, line):
+        splitted = line.split("\t")
+        if len(splitted) == 15:
+            self.project        = splitted[ self._PROJECT_POS ]             
+            self.source_file    = splitted[ self._SOURCE_FILE_POS ]     
+            self.dummy1         = splitted[ self._DUMMY1_POS ]         
+            self.resource_type  = splitted[ self._RESOURCE_TYPE_POS ] 
+            self.gid            = splitted[ self._GID_POS ]             
+            self.lid            = splitted[ self._LID_POS ]             
+            self.helpid         = splitted[ self._HELPID_POS ]         
+            self.platform       = splitted[ self._PLATFORM_POS ]         
+            self.dummy2         = splitted[ self._DUMMY2_POS ]         
+            self.langid         = splitted[ self._LANGID_POS ]         
+            self.text           = splitted[ self._TEXT_POS ]             
+            self.helptext       = splitted[ self._HELPTEXT_POS ]         
+            self.quickhelptext  = splitted[ self._QUICKHELPTEXT_POS ] 
+            self.title          = splitted[ self._TITLE_POS ]         
+            self.date           = splitted[ self._DATE_POS ]            
+
+    def get_file_id(self):
+        return self.project + "\\" + self.source_file
+    
+    def get_resource_path(self):
+            return self.source_file[0:self.source_file.rfind( "\\" )-1]
+    
+    def __str__(self):
+        return ''.join([self.project, "\t", self.source_file, "\t", self.dummy1, "\t", self.resource_type, "\t" , 
+            self.gid, "\t", self.lid, "\t", self.helpid, "\t", self.platform, "\t", self.dummy2, "\t" , self.langid, 
+            "\t", self.text, "\t", self.helptext, "\t", self.quickhelptext, "\t" , self.title, "\t", self.date ])
+    
+    def get_id2(self):
+        return ''.join([self.project, self.gid, self.lid, self.source_file, self.resource_type, self.platform, self.helpid])   
+                 
+    def get_id(self):
+        return ''.join([self.project, self.gid, self.lid, self.source_file, self.resource_type, self.platform, self.helpid, self.langid])

File l10ntools/scripts/tool/sdffile.py

-import sys, sdf2, heapq
+import os, shutil, sys, sdf2, tempfile
 
 class IdentFile:
     
     def __init__(self):
-        self.lines = list()
+        #self.lines = list()
         self.ident = dict()
         self.used_keyids = dict()
         
             ie.set_properties(line)
             self.ident[ie.get_identifier()] = ie
             self.used_keyids[ie.keyid] = "1"
-            self.lines.append(ie)
+            #self.lines.append(ie)
             
     def write(self, file):
         try:
-            fh = open(file, "w")
+            fd, tmpfile = tempfile.mkstemp()
+	    fh = os.fdopen(fd, "w")
             mylist = list()
             self.rehash()
             for line in self.ident.values():
             mylist.sort(key=str.lower)
             for line in mylist:
                 fh.write(line+"\n")
+		#print line+"\n"
+ 	    fh.close()
+	    shutil.move(tmpfile, file)
         except IOError:
             print "Can not write to file "+file
             sys.exit(-1)
 
+    def make_sdf2idententity(sdfentity):
+        c_ident = sdf2.Sdf2IdentEntity()
+        c_ident.project = line.project
+        c_ident.source_file = line.source_file
+        c_ident.resource_type = line.resource_type
+        c_ident.gid = line.gid           
+        c_ident.lid = line.lid           
+        c_ident.sid = line.platform           
+        c_ident.lang = line.langid          
+        #c_ident.keyid = sdf2.calc_keyid(line.project+line.source_file+line.lid+line.gid+line.platform+line.resource_type)
+        c_ident.source_hash = sdf2.calc_hash(line.text+line.helptext+line.quickhelptext+line.title)
+        c_ident.source_text = line.text   
+        c_ident.source_helptext = line.helptext       
+        c_ident.source_quickhelptext = line.quickhelptext  
+        c_ident.source_title = line.title   
+        return c_ident
+
     def add_sdf(self, line):
         c_ident = sdf2.Sdf2IdentEntity()
         c_ident.project = line.project
         c_ident.source_helptext = line.helptext       
         c_ident.source_quickhelptext = line.quickhelptext  
         c_ident.source_title = line.title   
-        return self.add(c_ident)
+        return self.add(c_ident), c_ident
 
     # line is a Sdf2IdentEntity
     def add(self, line):
         if self.ident.has_key(line.get_identifier()):
             ref_line = self.ident[line.get_identifier()]
             keyid = ref_line.keyid
-            status = "OLD"
+            status = "UNCHANGED"
             ref_l10n = ref_line.source_text+ref_line.source_helptext+ref_line.source_quickhelptext+ref_line.source_title
             new_l10n = line.source_text+line.source_helptext+line.source_quickhelptext+line.source_title
             if not ref_l10n == new_l10n:
                 status = "CHANGED"
+                ref_line.set_l10n_status(status)
         else:
             # new string
-            #line.source_hash = sdf2.calc_hash(line.source_text+line.source_helptext+line.source_quickhelptext+line.source_title)
             status = "NEW"
-            candidate = self.detect_moved(self.lines, line)
-            if not candidate == None:
-                print "Move detected: "+str(line)
-                print "Candidate:     "+str(candidate)
-                keyid = candidate.keyid
-                del self.ident[candidate.get_identifier()]
-                status = "MOVED"
-            else:
-                keyid = sdf2.calc_keyid(line.project+line.source_file+line.lid+line.gid+line.sid+line.resource_type)
-                cnt = 0
-                while(keyid in self.used_keyids):
-                    cnt = cnt + 1
-                    keyid = sdf2.calc_keyid(str(line.project)+str(line.source_file)+str(line.lid)+str(line.gid)+str(line.sid)+str(line.resource_type)+str(cnt))
+            keyid = sdf2.calc_keyid(line.project+line.source_file+line.lid+line.gid+line.sid+line.resource_type)
+            cnt = 0
+            while(keyid in self.used_keyids):
+                cnt = cnt + 1
+                keyid = sdf2.calc_keyid(str(line.project)+str(line.source_file)+str(line.lid)+str(line.gid)+str(line.sid)+str(line.resource_type)+str(cnt))
         line.keyid = keyid
         self.used_keyids[keyid] = 1
+        line.set_l10n_status(status)
         self.ident[line.get_identifier()] = line
         return status
 
-    def detect_moved(self, ident, line):
-        # filename, gid,lid,sid,hash
-        pq = []
-        
-        for ident_line in ident:
-            val = 32
-            if ident_line.source_hash == line.source_hash:
-                if ident_line.project == line.project:
-                    val /= 2
-                if ident_line.source_file == line.source_file:
-                    val /= 2
-                if ident_line.gid == line.gid:
-                    val /= 2
-                if ident_line.lid == line.lid:
-                    val /= 2
-                if ident_line.sid == line.sid:
-                    val /= 2
-                heapq.heappush(pq, (val, ident_line))
-        if len(pq) > 0:
-            val, candidate = heapq.heappop(pq)
-            # At least three equal attributes found! 
-            if val <= 4:
-                return candidate
-            else:
-                return None
-        else:
-            return None
-
     def rehash(self):
         for line in self.ident.values():
             line.source_hash = sdf2.calc_hash(line.source_text+line.source_helptext+line.source_quickhelptext+line.source_title)