Commits

torotoki  committed dfeaece

Release restore_tgr.py and convert.py for PB

  • Participants
  • Parent commits 1de8dc9

Comments (0)

Files changed (71)

File bin/convert.py

 # -*- coding: utf-8 -*-
 
+# This file is forked convert.py that for OC.
+# Although probably the file also can parse OC,
+# It is not tested in OC.
+
+
 __version__ = '1.2'
 
 import re
 from glob import glob
 from copy import copy
 from env_cabocha import envCaboCha
-# import CaboCha
 
 def pp(obj):
     # The method is to debug
     return re.sub(r"\\u([0-9a-f]{4})", lambda x: unichr(int("0x"+x.group(1),
                                                             16)), str)
 
-class Extracted():
+class TgrTags:
+    def __init__(self):
+        # start_keys_dict is DefaultOrderedDict that each key is the start position of words
+        # end_key_dict is almost same it
+        # tag is the list of information of a tag
+
+        self.s_dict = DefaultOrderedDict(lambda: DefaultOrderedDict(list))
+        self.e_dict = DefaultOrderedDict(lambda: DefaultOrderedDict(list))
+
+    def set_tag(self, start_line, start_position, end_line, end_position, tag):
+        self.s_dict[start_line][start_position].append(tag)
+        self.e_dict[end_line][end_position].append(tag)
+
+    def get_by_start(self, start_line, start_pos):
+        return self.s_dict[start_line][start_pos]
+
+    def get_by_end(self, end_line, end_pos):
+        return self.e_dict[end_line][end_pos]
+
+    def get_by_offset(self, start_line, start_pos, end_line, end_pos):
+        got_tags = []
+        starts = self.get_by_start(start_line, start_pos)
+        ends   = self.get_by_end(end_line, end_pos)
+
+        # judge head
+        for tag in starts:
+            refer_tag = tag[0]  # such as ガ, オ, ニ
+            if refer_tag == u'述語':
+                got_tags.append(tag)
+
+        for tag in ends:
+            refer_tag = tag[0n]
+            if refer_tag != u'述語':
+                got_tags.append(tag)
+
+        return got_tags
+
+    def words(self):
+        return self.e_dict
+
+# Now using OrderedDefaultDict(dict) that each key is the end position of words instead of it
+# class BCCWJWords:
+#     def __init__(self):
+#         # start_keys_dict is DefaultOrderedDict that each key is the start position of words
+#         # end_key_dict is almost same it
+#         # tag is the list of information of a tag
+#         self.s_dict = DefaultOrderedDict(dict)
+#         self.e_dict = DefaultOrderedDict(dict)
+
+#     def set_word(self, start_line, start_position, end_line, end_position, morph):
+#         self.s_dict[start_line][start_position] = morph
+#         self.e_dict[end_line][end_position] = morph
+
+
+#     def get_by_start(start_line, start_pos):
+#         return self.s_dict[start_line][start_pos]
+
+#     def get_by_end(end_line, end_pos):
+#         return self.e_dict[start_line][start_pos]
+
+
+class Extracted:
     def __init__(self, id):
         self.id = id
         self.last = 1
         self.prev_line_end = 1
         self.number = {}
         self.contents = []
-        self.tags = DefaultOrderedDict(list)
+        # self.tags = DefaultOrderedDict(list)
+        self.tgr_tags = TgrTags()
         self.morph = DefaultOrderedDict(dict)
 
         # For debug
     def set_contents(self, c):
         self.contents = c.split('\n')
 
+    def convert(self, node, numtrans=False):
+        """ Convert morphlogical information of SUW to dict. """
+
+        def tagged_char(element):
+            if element.tagName == 'sampling':
+                return ""
+            elif element.tagName == 'ruby':
+                return element.childNodes[0].data
+            elif element.tagName == 'correction':
+                if element.childNodes:
+                    return element.childNodes[0].data
+                else:
+                    return ""
+            elif element.tagName == 'enclosedCharacter':
+                return element.childNodes[0].data
+            else:
+                print >>stderr, "Found the unknown tag in the position of tagged charachter:", element.tagName
+
+        morph = {k:v.nodeValue for k,v in dict(node.attributes).iteritems()}
+        if not numtrans:
+            # in general case
+            morph['word'] = ""
+            for elem_or_text in node.childNodes:
+                # Trying in the case of tagged node
+                try:
+                    morph['word'] += tagged_char(elem_or_text)
+                except AttributeError:
+                    morph['word'] += elem_or_text.data
+        else:
+            # If the node is in a NumTrans tag
+            morph['word'] = node.getAttribute('originalText')
+
+        morph['start'] = int(node.getAttribute('start'))/10 - self.prev_line_end
+        morph['end'] = int(node.getAttribute('end'))/10 - self.prev_line_end
+        self.last = int(node.getAttribute('end'))/10
+
+        if not morph['word']:
+            pass
+        return morph
+
+
+    def add_line(self):
+        """ Add new line to the buffer """
+        self.prev_line_end = self.last
+        self.current_line += 1
+
+    def restore_fraction(self, fraction_node):
+        """ Restore fraction node """
+
+        skip_next = False
+        for e,s in enumerate(fraction_node.getElementsByTagName('SUW')):
+            # Skip current node for the fraction.
+            if skip_next: skip_next = False; continue
+            if s.parentNode.tagName == 'NumTrans':
+                text = s.getAttribute('originalText')
+                if text == '/':
+
+                    slash_morph = convert(s)
+                    # Reversing numerator and denominator
+                    nnode = suw.getElementsByTagName('SUW')[e+1]
+                    nnode_morph = convert(nnode)
+                    if not nnode_morph: print self.id;continue
+                    self.last = nnode_morph['end']
+                    # XXX: Can obtain the correct text with following sequence:
+                    # 1. Previous words
+                    # 2. Numerator
+                    # 3. Fraction slash
+                    # 4. Denominator
+                    current_line_morphs = \
+                                self.morph[self.current_line].values()
+                    fraction_prevs = dict([(i['end'],i) \
+                                        for i in current_line_morphs[:-1]])
+                    fraction_next = dict([(i['end'],i) \
+                                        for i in current_line_morphs[-1:]])
+
+                    # Changing position numerator and denominator
+
+                    # Numerator
+                    numer_key   = fraction_next.keys()[-1]
+                    numer_value = copy(nnode_morph)
+                    numer_value['start'] = fraction_next[numer_key]['start']
+                    numer_value['end'] = fraction_next[numer_key]['end']
+                    # denominator = [(next_key, next_value)]
+
+                    # slash
+                    slash_position = slash_morph['end']
+
+                    # denominator
+                    denom_position = nnode_morph['end']
+                    denom_value    = copy(fraction_next.values()[-1])
+                    denom_value['start'] = nnode_morph['start']
+                    denom_value['end'] = nnode_morph['end']
+
+                    # key = self.morph[self.current_line].keys()[0]
+                    self.morph[self.current_line] = dict(
+                            fraction_prevs.items() + \
+                            {numer_key: numer_value,  # numerator
+                             slash_position: slash_morph, # slash
+                            denom_position: denom_value  # denominator
+                            }.items()
+                    )
+                    prev_end = nnode.getAttribute('end')
+                    skip_next = True  # Next node is already obtained.
+                else:
+                    morph = self.convert(s)
+                    if morph:
+                        self.morph[self.current_line][morph['end']] = morph
+            else:
+                morph = self.convert(s)
+                if morph:
+                    self.morph[self.current_line][morph['end']] = morph
+
+    def restore_suw(self, suw):
+        """ Restore original words from luw of tgr format """
+        # TODO: define new function for fraction
+        if suw.tagName == 'fraction':
+            self.restore_fraction(suw)
+        elif suw.tagName == 'NumTrans':
+            for suw_in_numtrans in suw.getElementsByTagName('SUW'):
+                morph = self.convert(suw_in_numtrans, numtrans=True)
+                # Looks like sometimes morph doesn't even exist
+                # (komachi 2013-01-19)
+                if morph:
+                    self.morph[self.current_line][morph['end']] = morph
+                    self.current_last = morph['end']
+
+        elif suw.tagName == 'webBr':
+            self.add_line()
+        elif suw.tagName == 'SUW':
+            morph = self.convert(suw)
+            if morph:
+                self.morph[self.current_line][morph['end']] = morph
+                self.current_last = morph['end']
+        elif suw.tagName == 'LUW':
+            # nested luw, is not used in OC
+            nested_luw = suw
+            for suw in nested_luw.childNodes:
+                self.restore_suw(suw)
+
+        elif suw.tagName == 'sampling':
+            pass
+        else:
+            print >>stderr, "found unknown tag name on SUW position:", suw.tagName
+
+    def restore_luw(self, luw, sentence_node):
+        nsib = luw.nextSibling
+        psib = luw.previousSibling
+        if luw.tagName == 'webBr':
+            if psib != None and psib.tagName != 'webBr' and nsib != None:
+                add_line()
+        elif luw.tagName == 'quote':
+            quote = luw
+            for quote_luw in quote.childNodes:
+                self.restore_luw(quote_luw, quote)
+        elif luw.tagName == 'sampling':
+            pass
+        # TODO: handle noteMarker tags
+        elif luw.tagName != 'LUW':
+            print >>stderr, "Unknown tag name on LUW position:", luw.tagName
+
+        for suw in luw.childNodes:
+            self.restore_suw(suw)
+
+        # article = luw.parentNode.parentNode.parentNode
+        # If last element, insert newline. it is judged by some heuristics.
+        sent = sentence_node  # alias
+        if luw.parentNode.lastChild == luw and \
+           luw.parentNode.tagName != "quote" and \
+            (luw.parentNode.nextSibling.nextSibling == None
+             or luw.parentNode.nextSibling.nextSibling.tagName != "LUW") and \
+            (sent.getAttribute('type') != 'fragment'
+             or sent.nextSibling.nextSibling == None):
+            self.add_line()
+            # Do not use ac_flag.
+            # ac_flag = "QCAnswer"
+
+
+    def restore_sentence(self, sentence_node):
+        """
+        Restore original sentence from tgr format
+
+        """
+
+        for luw in sentence_node.childNodes:
+            self.restore_luw(luw, sentence_node)
+
     def set_morph(self, xml):
+        # fraction_id = 0  # Used  instead of start position of fraction slash
 
-        def convert(node):
-            """ Convert morphlogical information of SUW to dict. """
+        # for article in xml.getElementsByTagName('article'):
+        #     if article.getAttribute('articleID').endswith('-Answer'):
+        #         add_line()
+        #     for sent in article.getElementsByTagName('sentence'):
+        #         self.restore_sentence(sent)
 
-            try:
-                morph = {k:v.nodeValue for k,v in dict(node.attributes).iteritems()}
-                morph['word'] = node.getAttribute('originalText') or \
-                                node.childNodes[0].data
-                morph['start'] = int(node.getAttribute('start'))/10 - self.prev_line_end
-                morph['end'] = int(node.getAttribute('end'))/10 - self.prev_line_end
-                self.last = int(node.getAttribute('end'))/10
-                return morph
-            except: print "RERERE"  # TODO: Check the fraction SUW
+        def check_sampling(sentence):
+            # This function returns a string that "start" or "end" or empty
+            sampling = sentence.getElementsByTagName("sampling")
+            if sampling:
+                start_or_end = sampling[0].getAttribute("type")
+                if start_or_end == "start":
+                    return "start"
+                elif start_or_end == "end":
+                    return "end"
+            else:
+                return ""
 
+        sampling_flag = False
+        sampling_end_flag = False
+        if self.id.endswith("m_0"):
+            article = xml.getElementsByTagName("article")
+            contents = article
+        else:
+            contents = xml.getElementsByTagName("div")
+        for each_ad in contents:
+            if each_ad.getAttribute('articleID').endswith('Answer'):
+                add_line()
+            for sent in each_ad.getElementsByTagName('sentence'):
+                if self.id.endswith("m_0") and sent.parentNode.tagName != "div":  # div の条件分岐はいらない?
+                    _sampling_flag = check_sampling(sent)
+                    if _sampling_flag == "start":
+                        sampling_flag = True
+                    elif _sampling_flag == "end":
+                        sampling_flag = False
+                        sampling_end_flag = True
 
-        def add_line():
-            """ Add buffer new line. """
-            self.prev_line_end = self.last
-            self.current_line += 1
+                    if sampling_flag or sampling_end_flag:
+                        sampling_end_flag = False
+                        continue
 
-        fraction_id = 0  # Used instead of start position of fraction slash
-        for article in xml.getElementsByTagName('article'):
-            if article.getAttribute('articleID').endswith('-Answer'):
-                add_line()
-            for sent in article.getElementsByTagName('sentence'):
-                if sent.parentNode.tagName == 'quote' or \
-                   sent.parentNode.tagName == 'quotation':
+                if sent.parentNode.tagName == "quotation":
                     continue
-                for luw in sent.childNodes:
-                    nsib = luw.nextSibling
-                    psib = luw.previousSibling
-
-                    if luw.tagName == 'webBr':
-                        if psib != None and psib.tagName != 'webBr' and nsib != None:
-                            add_line()
-                    elif luw.tagName != 'LUW':
-                        print >>stderr, "Unknown tag name on LUW position:", luw.tagName
-                    # article = luw.parentNode.parentNode.parentNode
-
-                    for suw in luw.childNodes:
-                        if suw.tagName == 'fraction':
-                            skip_next = False
-                            for e,s in enumerate(suw.getElementsByTagName('SUW')):
-                                # Skip current node for fraction.
-                                if skip_next: skip_next = False; continue
-                                if s.parentNode.tagName == 'NumTrans':
-                                    text = s.getAttribute('originalText')
-                                    if text == '/':
-                                        node = convert(s)
-                                        # Reverse numerator and denominator
-                                        nnode = suw.getElementsByTagName('SUW')[e+1]
-                                        nnode_morph = convert(nnode)
-                                        if not nnode_morph: print self.id;continue
-                                        self.last = nnode_morph['end']
-                                        # XXX: Can obtain correct sequence text.
-                                        # 1. Previous words
-                                        # 2. Numerator
-                                        # 3. Fraction slash
-                                        # 4. Denominator
-                                        current_line_morphs = \
-                                                    self.morph[self.current_line].values()
-                                        fraction_prevs = dict([(i['end'],i) \
-                                                            for i in current_line_morphs[:-1]])
-                                        fraction_next = dict([(i['end'],i) \
-                                                            for i in current_line_morphs[-1:]])
-
-                                        # change position Numerator and Denominator
-
-                                        # Numerator
-                                        numer_key   = fraction_next.keys()[-1]
-                                        numer_value = copy(nnode_morph)
-                                        numer_value['start'] = fraction_next[numer_key]['start']
-                                        numer_value['end'] = fraction_next[numer_key]['end']
-                                        # denominator = [(next_key, next_value)]
-
-                                        # Slash
-                                        slash_position = node['end']
-                                        slash_morph = node
-
-                                        # Denominator
-                                        denom_position = nnode_morph['end']
-                                        denom_value    = copy(fraction_next.values()[-1])
-                                        denom_value['start'] = nnode_morph['start']
-                                        denom_value['end'] = nnode_morph['end']
-
-                                        # key = self.morph[self.current_line].keys()[0]
-                                        self.morph[self.current_line] = dict(
-                                                fraction_prevs.items() + \
-                                                {numer_key: numer_value,  # numerator
-                                                 slash_position: slash_morph, # slash
-                                                 denom_position: denom_value  # denominator
-                                                }.items()
-                                        )
-                                        prev_end = nnode.getAttribute('end')
-                                        skip_next = True  # Next node is already contained.
-                                    else:
-                                        morph = convert(s)
-                                        if morph:
-                                            self.morph[self.current_line][morph['end']] = morph
-                                else:
-                                    morph = convert(s)
-                                    if morph:
-                                        self.morph[self.current_line][morph['end']] = morph
-                        elif suw.tagName == 'NumTrans':
-                            for e,suw_in_numtrans in \
-                              enumerate(suw.getElementsByTagName('SUW')):
-                                morph = convert(suw_in_numtrans)
-                                # Looks like sometimes morph doesn't even exist
-                                # (komachi 2013-01-19)
-                                if morph:
-                                    self.morph[self.current_line][morph['end']] = morph
-                                    self.current_last = morph['end']
-                        elif suw.tagName == 'webBr':
-                            add_line()
-                        elif suw.tagName == 'SUW':
-                            morph = convert(suw)
-                            if morph:
-                                self.morph[self.current_line][morph['end']] = morph
-                                self.current_last = morph['end']
-                        else:
-                            print >>stderr, "found unknown tag name on SUW position:", suw.tagName
-
-                    # If last element, insert newline.
-                    if luw.parentNode.lastChild == luw and \
-                       luw.parentNode.tagName != "quote" and \
-                        (luw.parentNode.nextSibling.nextSibling == None or
-                         luw.parentNode.nextSibling.nextSibling.tagName != "LUW") and \
-                        (sent.getAttribute('type') != 'fragment' or \
-                         sent.nextSibling.nextSibling == None):
-                        add_line()
-                        # Do not use ac_flag.
-                        # ac_flag = "QCAnswer"
+                self.restore_sentence(sent)
 
     def set_tags(self, t):
+        # Processing for tgr tags
+
         for e,i in enumerate(t.splitlines()):
             if not i or i.startswith('np'): continue
             tag = i.strip().split('\t')
             start[0] -= 1
             end[0]   -= 1
 
-            id = '.'.join([str(i) for i in end])
+            info = []
 
-            # for k in [key for key, prev_range in self.tags.iteritems() \
-            #           if tag_range == prev_range[2]]:
-            match_key = ""
-            for key, prev_ranges in self.tags.iteritems():
-                for prev_range in prev_ranges:
-                    if tag_range == prev_range[3]:
-                        match_key = key
-            if match_key:
-                # If same id in self.tags
-                self.tags[match_key].append([])
-                info = self.tags[match_key][-1]
-            else:
-                # If new item
-                self.tags[id] = [[]]
-                info = self.tags[id][0]
             info.append(tag[0][:]) # tag name
             # info.append(tag_range) # range of tag
             word = ''
                 word = ''.join([self.contents[start[0]][start[1]:],
                                 self.contents[end[0]][:end[1]]])
             if not word:
-                print >>stderr, "Cannot obtain word at %d.%d ~ %d.%d in %s" % \
+                print >>stderr, "Cannot obtain the word at %d.%d ~ %d.%d in %s" % \
                     (start[0], start[1], end[0], end[1], self.id)
             info.append(word)  # word
+            manage = tag[3].split(';')  # id and link information
             if 'ln=' in tag[3]:
-                manage = tag[3].split(';')
                 # management info id(id) and reference to it(ln)
                 if manage[0].startswith('id='):
                     info.append({'id':re.sub('^id=', '', manage[0]),
                     info.append({'id':re.sub('^id=', '', manage[1]),
                                  'ln':re.sub('^ln=', '', manage[0])})
             else:
-                info.append({'id':re.sub('^id=', '', tag[3])[:-1]}) # management info id
+                info.append({'id':re.sub('^id=', '', manage[0])}) # management info id
             info.append(tag_range)
-            # self.tags[info[1][0]] = info
             self.number[id] = int(re.sub('id:', '', tag[1]))
 
+            self.tgr_tags.set_tag(start[0], start[1], end[0], end[1], info)
+
             # Error check to debug
-            self.starts.append(start)
-            self.ends.append(end)
+            # self.starts.append(start)
+            # self.ends.append(end)
 
     def check_morphs(self):
         for start, end in zip(self.starts, self.ends):
 
 
     def get_tags(self):
-        # sorting to make a sorted dictionary
-        # self.tags, for example {1(end line): {2(end position): [ ..word info... ] ...} ...}
-        def sorted_tags_key(dict_key):
-            (line, position) = dict_key.split('.')
-            position = position.zfill(4)
-            return float(line+'.'+position)
+        # # sorting to make the sorted dictionary
+        # # self.tags, for example {1(end line): {2(end position): [ ..word info... ] ...} ...}
+        # def sorted_tags_key(dict_key):
+        #     (line, position) = dict_key.split('.')
+        #     position = position.zfill(4)
+        #     return float(line+'.'+position)
 
-        tags = DefaultOrderedDict(dict)
-        for k,v in sorted(self.tags.iteritems(),
-                             key=lambda t: sorted_tags_key(t[0])):
-            tags[k] = v
-        return tags
+
+        # tags = DefaultOrderedDict(dict)
+        # for k,v in sorted(self.tags.iteritems(),
+        #                      key=lambda t: sorted_tags_key(t[0])):
+        #     tags[k] = v
+        # return tags
+
+        # Now this function returns be not sorted dictionary
+        return self.tgr_tags
 
     def get_morph(self):
         # Sorting to make a sorted dictionary
 
 def tgr_parser(contents):
     """
-    tgr ファイルをそれぞれのコンテンツに分割し、
-    Extracted クラスのリストとして返す。
+    Spliting a tgr file to contents,
+    and returning the list of a Extracted class
     """
     id_pat  = re.compile(r"^<text id=(\w+)>$")
     contains = "normal"
         if text_id.endswith('m_[1-9]'):
             exit(-1)
         filename = re.sub('m_0', '', text_id)
-        xml = minidom.parse("%s/%s.xml" % (xml_folder, filename))
+        try:
+            xml = minidom.parse("%s/%s.xml" % (xml_folder, filename))
+        except IOError, e:
+            print >>stderr, "Cannot open the BCCWJ file, so it is skipped"
         extracted.set_morph(xml)
 
     return tgrs
         except AttributeError:
             return fail
 
-class tags:
+class Tags:
     def get_tags(self):
         return self.tags
 
     def set_tags(self, tag):
         self.tags = dict(tag.items() + self.tags.items())
 
-class JoinedTag(tags):
+class JoinedTag(Tags):
     def __init__(self, word, morph_info):
         self.word = word
         self.morph_info = morph_info
             output += "_"
         return output
 
-class AlreadyTags(tags):
+class AlreadyTags(Tags):
     """
     Already got tags before a verb word.
-    It will merge JoinedTag when found verb word
+    It will merge JoinedTag when found the verb word
     """
     def __init__(self, tags={}):
         self.tags = tags
             chunk_number = int(chunk[1])
             insert_chunk(key(), chunk, end_line, end_index)
         else:
-            # Try to access next chunk
+            # Trying to get next chunk
             # on difference position between previous chunk
             if end_line == self.last_chunk_offset[0]:
                 p_end_position = self.last_chunk_offset[1]
 
     # def fix_double_word(self, word, previous_word):
     #     # If previous word is substring of current word,
-    #     # remove the part of substring in current word.
+    #     # remove the part of substring in it.
     #     # For example, previous word: 保育, current word: 保育園
     #     # After current word: 園
     #     if previous_word:
     #             # this code has issue:
     #             # Sentence: ...|の|男の子|...
     #             # If tagged 'の' and '男の子' in tgr,
-    #             # Wrong matching. prev: の this: 男の子
+    #             # it is wrong matching likes: prev: の this: 男の子
     #             print >>stderr, "Double words. Current word replace:", \
     #                 "prev:%s this:%s" % (p_word_name, word)
     #             word = re.sub(p_word_name, "", word)
                     self.result[prev_pred].set_tags({"alt": "causative"})
                 else:
                     # FIXME
-                    print >>stderr, "Cannot found pred for causative"
+                    print >>stderr, "Not found a pred for causative"
                 self.pword_causative_flag = True
 
             elif word_lemma in (u"れる", u"られる"):
                     if prev_pred:
                         self.result[prev_pred].set_tags({"alt": "passive"})
                     else:
-                        print >>stderr, "Cannot found pred for causative"
+                        print >>stderr, "Not found a pred for causative"
                 elif self.pword_causative_flag:
                     prev_pred = find_pred_key(self.result, search_range=(2,3))
                     if prev_pred:
                         self.result[prev_pred].set_tags({"alt": "causative/passive"})
                     else:
-                        print >>stderr, "Cannot found pred for causative/passive"
+                        print >>stderr, "Not found a pred for causative/passive"
                 self.pword_causative_flag = False
         else:
             # If previous word was pred, this is active
         # Judge reference zero(True) or dep(False)
 
         def get_offset_from_tags(id):
-            for k,same_offset_values in self.all_tags.iteritems():
-                for value in same_offset_values:
-                    if value[2].get('id', "") == id:
-                        return k
+            for line_n,line in self.all_tags.words().iteritems():
+                for pos_n, same_offset_tags in line.iteritems():
+                    for tag in same_offset_tags:
+                        if tag[2].get('id', "") == id:
+                            return (line_n, pos_n)
 
         if exo_flag:
             return "zero"
         if word_ln:
-            ln_offset = get_offset_from_tags(word_ln)
+            ln_offset = get_offset_from_tags(word_ln)  # pred or noun offset
             if not ln_offset:
                 print >>stderr, "Not found link id in self.all_tags"
                 return  # cannot judge
-            ln_offset = tuple(map(int, ln_offset.split('.')))  # pred offset
             my_offset = word_offset                            # ga,o,ni,.. offset
-            if ln_offset[0]-1 == my_offset[0]:  # If same line
+            if ln_offset[0]-1 == my_offset[0]:  # If same line  # FIXME: describe to discrese
 
                 my_chunk = line_chunks.get_chunk_from_pos(my_offset[1], amb=True)
-                my_id, my_link_id = (int(my_chunk[1]), int(my_chunk[2][:-1]))
+                my_id, my_link_id = (int(my_chunk[1]), int(my_chunk[2][:-1]))  # id, link
                 if my_link_id == '-1':  # no refer
                     return "zero"
                 my_link = line_chunks.get_chunk_from_id(my_link_id)  # link info
                     print >>stderr, "Not found link chunk in current line"
                     return  # cannot judge
                 ln_id, ln_link = (int(ln_chunk[1]), int(ln_chunk[2][:-1]))
-                if ln_id == my_link_id or my_id == ln_id or ln_link == my_id:
+                if my_id == ln_id:
+                    # same chunk
+                    return "sc"
+                elif ln_id == my_link_id or ln_link == my_id:
                     return "dep"
                 else:
                     return "zero"
             if match: return match[0]
             else: return None
 
-        # tag_and_links is dict,
-        # It is cannot convert to dict because it has same key in some case.
-        word_tag = [k for (k,v) in tag_and_link]
-        word_links = [k for (k,v) in tag_and_link]
+        # if tag_and_links is dict,
+        # it could not convert to dict because there have same key in some case.
+        word_tags = [k for (k,v) in tag_and_link]
+        word_links = [v for (k,v) in tag_and_link]
         self.check_chunk_inserting(end_line, end_index)
 
         if end_line == 0:
         original_lemma = morph['lemma'] if morph.get('lemma') else word
         self.check_alt(original_lemma)
 
-        if not word_links and not word_tag:
+        if not word_links and not word_tags:
             key_0 = "%d.%d" % (end_line, end_index)
             key_1 = self.bccwj_word_id
             self.result[(key_0, key_1)] = \
             self.bccwj_word_id -= 1
             return
 
-        # Does not fix double word
+        # Now the issue with double words won't fix
         # if not exo_flag:
         #     previous = self.result.values()#[-1]#.get_word()
         #     previous_word = previous[-1] if previous else ""
                                 JoinedTag(word, "%s %s %s" % (lform, lemma, output_pos))
                     self.result[key].set_tags({"type": pos_type})
             elif tag in (u"ガ", u"ヲ", u"ニ", u"ハ"):
+                if end_line == 7 and end_index == 24:
+                    pass
                 gaoni = self.gaonimap(tag)
-                if self.judge_zero(exo_flag, (end_line, end_index), links['id'],
-                                   links.get('ln'), self.chunks[end_line]) == "zero":
-                    gaoni_type = "zero"
-                else:
-                    gaoni_type = "dep"
+                # try:
+                    # if self.judge_zero(exo_flag, (end_line, end_index), links['id'],
+                    #                    links.get('ln'), self.chunks[end_line]) == "zero":
+                    #     gaoni_type = "zero"
+                    # else:
+                    #     gaoni_type = "dep"
+                gaoni_type = self.judge_zero(exo_flag, (end_line, end_index), links['id'],
+                                             links.get('ln'), self.chunks[end_line])
+                # except TypeError, e:
+                #     print >>stderr, "Catch %s when judge wheather zero reference" % e
+                #     gaoni_type = "zero"
 
                 if search_value(self.result, links['id']) or exo_flag:
                     exo = self.exomap(end_index)
             if type(joined) == str_type:  # chunk
                 chunk = joined
                 if chunk.split(' ')[1] == "0":
-                    if not file_head:  # Does NOT append EOS in file head
+                    if not file_head:  # Checking whether does append EOS in the file head or not
                         res.append("EOS")
                     file_head = False
                     res.append("# S-ID:%s_%s KNP:96/10/27 %s/%s/%s" % \
         res.append("EOS")  # file tail
         return res
 
+def restore_from_morphs(morphs):
+    # to debug function, restore from morphs
+    import sys
+    write = sys.stdout.write
+    print "="*80
+    for line, lineMorphs in morphs.items():
+        print str(line)+';',
+        for index, morph in lineMorphs.items():
+            write(morph["word"])
+        print
+    print "="*80
+
+
 def output(tgr, tgr_id, cabocha_env):
     """
     Return a list of the same format as the NAIST Text Corpus.
     """
 
-    tags = tgr.get_tags()
-    print >>stderr, pp(tags.values())
+    tgr_interface = tgr.get_tags()
+    # print >>stderr, pp(dict(tags.items()))
     morphs = tgr.get_morph()
+    # restore_from_morphs(morphs)
     numbers = tgr.get_number()
 
     ## Join BCCWJ and tgr data
 
+    # def find_reference_word(tags, tgr_word, bccwj_offset):
+    #     # Matching a tagged word of tgr and a word of BCCWJ.
+    #     # If except this function, any tagged words assign the BCCWJ word that has the same end
+    #     # position, but this function returns the most almost word in BCCWJ by heuristick.
+    #     # `tags`: all tags in tgr
+    #     # `tgr_word`: the string of tgr word
+    #     # `bccwj_offset`: the tuple of a word offset in BCCWJ
+    #     # return: the most nearly word in BCCWJ
+    #     raise Exception("Implementation Error: the function is not implemented.")
+
+
     def convert_same_offset(same_offset_tags):
         if not same_offset_tags: return
-        word_tag = []
+        word_tags = []
         for tag in same_offset_tags:
-            # refer_tag such as ガ,オ,ニ
+            # refer_tag is such as ガ,オ,ニ
             (refer_tag, word_name, link, tag_range) = tag
             # word_tag[refer_tag] = link
-            word_tag.append((refer_tag, link))
-        return (word_name, word_tag, tag_range)
+            word_tags.append((refer_tag, link))
+        return (word_name, word_tags, tag_range)
+
+    def judge_head(tag, morph, start_line, start_pos, end_line, end_pos):
+
+        pass
 
 
     referred_tag_number = -1
-    classification = classify(tags, morphs, tgr_id, use_cabocha=True, cabocha_env=cabocha_env)
+    classification = classify(tgr_interface, morphs, tgr_id, use_cabocha=True, cabocha_env=cabocha_env)
     for end_line, line_morphs in morphs.iteritems():
         if end_line == -1: continue  # exo will be processed after this
         for end_index, morph in line_morphs.iteritems():
-            tag_positions = tags.keys()
-            key = str(end_line+1) + '.' + str(end_index)
-            same_offset_tag = tags.get(key, [])
-            bccwj_word = morph['word']
-            if same_offset_tag:
-                # Tagged data matched end position in BCCWJ.
-                (word_name, tag, tag_range) \
-                    = convert_same_offset(same_offset_tag)
-                classification.apply(bccwj_word, morph,
+            bccwj_word_name = morph['word']
+
+            # Adjust this word's offset between tgr and BCCWJ
+            tgr_start_line = end_line + 1
+            tgr_end_line   = end_line + 1
+            tgr_start_pos  = end_index - len(bccwj_word_name)
+            tgr_end_pos    = end_index
+
+            if bccwj_word_name == u"た":
+                pass
+
+            # a list of tags in same offset
+            tags = tgr_interface.get_by_offset(tgr_end_line, tgr_start_pos,
+                                               tgr_end_line, tgr_end_pos)
+            if tags:
+                # The tagged data matched same end position of a word in BCCWJ.
+                (tgr_word_name, tag, tag_range) \
+                    = convert_same_offset(tags)
+
+                classification.apply(bccwj_word_name, morph,
                                      tag, end_line, end_index)
-                referred_tag_number = tag_positions.index(key)
             else:
-                # To find correct end position.
-                for r in range(1, len(bccwj_word)):
-                    key = str(end_line+1) + '.' + str(end_index-r)
-                    position = end_index-r
-                    same_offset_tag = tags.get(key, [])
-                    skip_flag = False
-                    if same_offset_tag:
-                        (word_name, tag, tag_range) \
-                            = convert_same_offset(same_offset_tag)
-                        if word_name in bccwj_word:
-                            print >>stderr, "Treat %s in BCCWJ as %s in tgr" % \
-                                (bccwj_word, word_name)
-                            classification.apply(bccwj_word, morph, tag,
-                                                 end_line, position)
-                            referred_tag_number = tag_positions.index(key)
-                            skip_flag = True
-                    if skip_flag:
-                        break
-                else:
-                    if len(tags)-1 != referred_tag_number:
-                        # If not refer last tag in current line
-                        n_key = tag_positions[referred_tag_number+1]
-                        n_tagged_morph = tags[n_key]
-                        n_word_name = n_tagged_morph[0][1]
-                    else:
-                        n_word_name = ""
+                # Cannot find same word in tgr.
+                classification.apply(bccwj_word_name, morph,
+                                     end_line=end_line, end_index=end_index)
 
-                    # if n_word_name and bccwj_word in n_word_name:
-                    #     print >>stderr, "Skipped %s in BCCWJ: %s in tgr" % \
-                    #         (bccwj_word.encode('utf-8'), n_word_name.encode('utf-8'))
-                    #     continue
-                    # else:
-                        # Cannot find same word in tgr.
-                    classification.apply(bccwj_word, morph,
-                                         end_line=end_line, end_index=end_index)
     # processing exo
-    for offset,same_offset_tags in tags.iteritems():
-        (line,position) = map(int, offset.split('.'))
-        if line != 0: continue
-        (word_name, word_tag, tag_range) \
-            = convert_same_offset(same_offset_tags)
-        for (k,v) in word_tag:  # word_tag is list (doesn't dict)
-            if v['id'] == "newid0130":
-                pass
-        classification.apply(word_name, {}, word_tag, line, position)
+    for line_n,line in tgr_interface.words().iteritems():  # line number, line
+        for pos, same_offset_tags in line.iteritems():   # position, same offset tags
+            # tag = same_offset_tags[0]
+            if line_n != 0: continue
+            (word_name, word_tag, tag_range) \
+                = convert_same_offset(same_offset_tags)
+            for (k,v) in word_tag:  # word_tag is list (doesn't dict)
+                if v['id'] == "newid0130":
+                    pass
+            classification.apply(word_name, {}, word_tag, line_n, pos)
 
     return classification.final_result()
 
                       action='store_true', default=False)
     (opts, args) = parser.parse_args()
 
-    PARSING = "OC"
+    PARSING = ("OC", "PB")  # And probably it also can parse OC
 
     cabocha_env = setup_cabocha_config()
     if not opts.debug_flag:
-        for dir in [n for n in os.listdir(opts.tgr_dir)
-                    if os.path.isdir(os.path.join(opts.tgr_dir, n))]:
-            try:
-                os.makedirs("%s/%s/%s" % (opts.out_dir, dir, PARSING))
-            except: pass
-            for root, current_d, files in os.walk("%s/%s/%s/" % (opts.tgr_dir, dir, PARSING)):
-                for f in glob(os.path.join(root, '*.tgr')):
-                    buff = ""
-                    name = os.path.basename(f)
-                    name = re.sub('.tgr', '.ntc', name)
-                    tgrs = inputs(f, opts.bccwj_dir)
-                    for tgr in tgrs:
-                        tgr_id = re.sub("m_0", "", tgr.id)
-                        converted = output(tgr, tgr_id, cabocha_env)
-                        buff += '\n'.join(converted) + '\n'
-                    try:
-                        with open('%s/%s/%s/%s' % (opts.out_dir, dir, PARSING, name), 'w') as fp:
-                            fp.write(buff.encode('utf-8'))
-                    except IOError, e:
-                        print >>stderr, "Cannot open %s:%s" % (name, e)
-                        exit(-1)
-                    except UnicodeDecodeError, e:
-                        print >>stderr, "Cannot decode %s:%s" % (name, e)
-                        exit(-1)
+        for CATEGORY in PARSING:
+            for dir in [n for n in os.listdir(opts.tgr_dir)
+                        if os.path.isdir(os.path.join(opts.tgr_dir, n))]:
+                try:
+                    os.makedirs("%s/%s/%s" % (opts.out_dir, dir, CATEGORY))
+                except: pass
+                for root, current_d, files in os.walk("%s/%s/%s/" % (opts.tgr_dir, dir, CATEGORY)):
+                    for f in glob(os.path.join(root, '*.tgr')):
+                        buff = ""
+                        name = os.path.basename(f)
+                        name = re.sub('.tgr', '.ntc', name)
+
+                        tgrs = inputs(f, opts.bccwj_dir)  # Obtain tags from tgr, and parsing BCCWJ xml
+
+                        for tgr in tgrs:
+                            tgr_id = re.sub(r"m_[0-9]", "", tgr.id)
+                            converted = output(tgr, tgr_id, cabocha_env)
+                            buff += '\n'.join(converted) + '\n'
+                        try:
+                            with open('%s/%s/%s/%s' % (opts.out_dir, dir, CATEGORY, name), 'w') as fp:
+                                fp.write(buff.encode('utf-8'))
+                        except IOError, e:
+                            print >>stderr, "Cannot open %s:%s" % (name, e)
+                            exit(-1)
+                        except UnicodeDecodeError, e:
+                            print >>stderr, "Cannot decode %s:%s" % (name, e)
+                            exit(-1)
     else:
-        # Experiment on one file.
+        # Experiment on the specified one file.
         # (print output to stdout and doesn't write any file)
         buff = ""
-        tgrs = inputs("./input/bccwj-fixed-13.03.18-2/A/OC/000.tgr",
+        tgrs = inputs("input/restored/A/PB/000.tgr",
                       opts.bccwj_dir)
         for tgr in tgrs:
             tgr_id = re.sub("m_0", "", tgr.id)
-            converted = output(tgr, tgr_id)
+            converted = output(tgr, tgr_id, cabocha_env)
             # buff += "# S-ID:%s KNP:96/10/27 %s/%s/%s\n" % \
             #         (tgr.id, d.year, d.month, d.day)
             buff += '\n'.join(converted) + '\n'

File bin/restore_tgr.py

             else:
                 print "Unknow tag name on SUW position:", suw.tagName
 
-        # If it is the last element, add newlines.
+        # If it is the last element, add a newline.
         if luw.parentNode.lastChild == luw and \
            luw.parentNode.tagName != "quote" and \
             (luw.parentNode.nextSibling.nextSibling == None or

File bin/restore_tgr_OW.py

-#!/usr/bin/python
+''#!/usr/bin/python
 #coding: utf-8
 
 import sys

File bin/restore_tgr_PB.py

 import re
 import os
 from glob import glob
+from copy import deepcopy
 from xml.dom import minidom
 from optparse import OptionParser
 
     else:
         return ""
 
+
+def tagged_char(element):
+    if element.tagName == 'sampling': pass
+    elif element.tagName == 'ruby':
+        return element.childNodes[0].data
+    elif element.tagName == 'correction':
+        return element.childNodes[0].data
+    return ""
+
+def processing_suw(suw, prevs, skip_next):
+    db_value = deepcopy(prevs)
+    if suw.tagName == 'fraction':
+        skip_next = False
+        for e,s in enumerate(suw.getElementsByTagName('SUW')):
+            if skip_next: skip_next = False; continue
+            if s.parentNode.tagName == 'NumTrans':
+                text = s.getAttribute('originalText')
+                if text == u'/':
+                    # reverse a numerator and a denominator
+                    nnode = suw.getElementsByTagName('SUW')[e+1]
+                    nnode_text = nnode.getAttribute('originalText') or \
+                                 nnode.childNodes[0].data
+                    db_value = db_value[:-1] + \
+                               nnode_text + u'/' + db_value[-1:]
+                    skip_next = True
+                else:
+                    db_value += text
+            else:
+                db_value += s.childNodes[0].data
+    elif suw.tagName == 'NumTrans':
+        db_value += suw.getAttribute('originalText')
+    elif suw.tagName == 'webBr':
+        db_value += '\n'
+    elif suw.tagName == 'SUW':
+        if len(suw.childNodes) > 1:
+            for elem_or_text in suw.childNodes:
+                try:
+                    db_value += tagged_char(elem_or_text)
+                except AttributeError:
+                    db_value += elem_or_text.data
+                except IndexError:
+                    pass
+        else:
+            try:
+                if suw.childNodes[0].tagName == 'enclosedCharacter':
+                    # Now go in <LUW ...><SUW ...>\
+                    # <enclosedCharacter description="some character"> here </...>
+                    db_value += suw.childNodes[0].childNodes[0].data
+                elif suw.childNodes[0].tagName == 'ruby':
+                    db_value += suw.childNodes[0].childNodes[0].data
+                elif suw.childNodes[0].tagName == 'correction':
+                    db_value += suw.childNodes[0].childNodes[0].data
+            except AttributeError:
+                db_value += suw.childNodes[0].data
+    elif suw.tagName == 'LUW':
+        # Following is used in except OC
+        nested_luw = suw
+        for suw in nested_luw.childNodes:
+            db_value = processing_suw(suw, db_value, False)
+    elif suw.tagName == 'noteMarker':
+        db_value += suw.getAttribute('text')
+    elif suw.tagName == 'sampling':
+        pass
+    else:
+        print "Unknown tag name on SUW position:", suw.tagName
+
+    return db_value
+
+
 def extract_text(luw):
     "Extracting text from the xml node 'LUW'"
 
-    db_value = ""
     nsib = luw.nextSibling
     article = luw.parentNode.parentNode.parentNode
+    skip_next = False
 
+    results = ""
     for suw in luw.childNodes:
-        if suw.tagName == 'fraction':
-            skip_next = False
-            for e,s in enumerate(suw.getElementsByTagName('SUW')):
-                if skip_next: skip_next = False; continue
-                if s.parentNode.tagName == 'NumTrans':
-                    text = s.getAttribute('originalText')
-                    if text == u'/':
-                        # reverse a numerator and a denominator
-                        nnode = suw.getElementsByTagName('SUW')[e+1]
-                        nnode_text = nnode.getAttribute('originalText') or \
-                                     nnode.childNodes[0].data
-                        db_value = db_value[:-1] + \
-                                   nnode_text + u'/' + db_value[-1:]
-                        skip_next = True
-                    else:
-                        db_value += text
-                else:
-                    db_value += s.childNodes[0].data
-        elif suw.tagName == 'NumTrans':
-            db_value += suw.getAttribute('originalText')
-        elif suw.tagName == 'webBr':
-            db_value += '\n'
-        elif suw.tagName == 'SUW':
-            if len(suw.childNodes) > 1:
-                for text in suw.childNodes:
-                    try:
-                        if text.tagName == 'sampling': pass
-                        elif text.tagName == 'ruby':
-                            db_value += text.childNodes[0].data
-                        elif text.tagName == 'correction':
-                            # db_value += text.childNodes[0].data
-                            # try:
-                            db_value += text.childNodes[0].data
-                            # except AttributeError:
-                            #     pass
-                    except AttributeError:
-                        db_value += text.data
-                    except IndexError:
-                        pass
-            else:
-                try:
-                    if suw.childNodes[0].tagName == 'enclosedCharacter':
-                        # Now go in <LUW ...><SUW ...>\
-                        # <enclosedCharacter description="Some character"> here </...>
-                        db_value += suw.childNodes[0].childNodes[0].data
-                    elif suw.childNodes[0].tagName == 'ruby':
-                        db_value += suw.childNodes[0].childNodes[0].data
-                except AttributeError:
-                    db_value += suw.childNodes[0].data
-        elif suw.tagName == 'LUW':
-            # Following is used in except OC
-            luw = suw
-            for suw in luw.childNodes:
-                if suw.tagName == 'SUW':
-                    for in_suw in suw.childNodes:
-                        try:
-                            if in_suw.tagName == 'correction':
-                                # Now go in <LUW ...><SUW ...>\
-                                # <correction originalText="..." type="..."> here </...>
-                                db_value += in_suw.getAttribute('originalText')
-                            elif in_suw.tagName == 'ruby':
-                                db_value += in_suw.childNodes[0].data
-                        except AttributeError:
-                            db_value += in_suw.data
 
-                if suw.tagName == 'NumTrans':
-                    db_value += suw.getAttribute('originalText')
-        elif suw.tagName == 'noteMarker':
-            db_value += suw.getAttribute('text')
-        elif suw.tagName == 'sampling':
-            pass
-        else:
-            print "Unknown tag name on SUW position:", suw.tagName
-
-    return db_value
+        results = processing_suw(suw, results, skip_next)
+    return results
 
 
 def restore_tgr(dom):
 
         if luw.parentNode.lastChild == luw and \
            luw.parentNode.tagName != "quote"  and \
-            (luw.parentNode.nextSibling.nextSibling == None \
+            (luw.parentNode.nextSibling.nextSibling == None
              or luw.parentNode.nextSibling.nextSibling.tagName != "LUW") and \
-            (dom.getAttribute('type') != 'fragment' \
+            (dom.getAttribute('type') != 'fragment'
              or dom.nextSibling.nextSibling == None):
             # TODO: I don't know why, but in some case,
-            # do not break after the fragment.
+            # it is not done newline after a fragment.
             db_value += "\n"
-        # if luw.parentNode.lastChild == luw
 
         if nsib == None: continue
     return db_value
     else:
         contents = xmldoc.getElementsByTagName("div")
     for each_ad in contents:
+        if each_ad.getAttribute('articleID').endswith('Answer'):
+            db_value += '\n'
         for sent in each_ad.getElementsByTagName('sentence'):
             if tgr_id.endswith("m_0") and sent.parentNode.tagName != "div":  # div の条件分岐はいらない?
                 _sampling_flag = check_sampling(sent)

File input/dist/A/OC/003.tgr

-<text id=OC03_01038m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-133
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-述語	id:4	[3.0, 3.5]	id=1;
-np	id:5	[3.6, 3.10]	id=newid0104;
-ヲ	id:6	[3.6, 3.10]	ln=2;id=newid0105;
-np	id:7	[3.11, 3.13]	id=newid0106;
-ニ	id:8	[3.11, 3.13]	ln=2;id=newid0107;
-np	id:9	[3.14, 3.20]	id=newid0108;
-述語	id:10	[3.14, 3.20]	id=2;
-np	id:11	[3.22, 3.27]	id=newid0109;
-述語	id:12	[3.25, 3.27]	id=3;
-np	id:13	[5.0, 5.5]	id=newid0110;
-述語	id:14	[5.6, 5.9]	id=4;
-np	id:15	[5.12, 5.15]	id=newid0111;
-np	id:16	[5.19, 5.21]	id=newid0112;
-述語	id:17	[5.19, 5.21]	id=7;
-np	id:18	[5.30, 5.31]	id=newid0113;
-ニ	id:19	[5.30, 5.31]	ln=8;id=newid0114;
-述語	id:20	[5.32, 5.34]	id=8;
-述語	id:21	[5.36, 5.42]	id=9;
-述語	id:22	[5.49, 5.51]	id=11;
-ヲ	id:23	[3.6, 3.10]	id=newid0115;ln=1;
-ガ	id:24	[5.0, 5.3]	id=newid0117;ln=1;
-ガ	id:25	[5.0, 5.3]	id=newid0118;ln=2;
-ガ	id:26	[3.14, 3.21]	id=newid0119;ln=3;
-ニ	id:27	[3.22, 3.24]	id=newid0120;ln=3;
-ガ	id:28	[5.0, 5.3]	id=newid0121;ln=4;
-機能語相当	id:29	[5.10, 5.12]	id=newid0123;
-ガ	id:30	[3.14, 3.21]	id=newid0124;ln=7;
-ガ	id:31	[5.0, 5.3]	id=newid0125;ln=8;
-ヲ	id:32	[3.11, 3.13]	id=newid0126;ln=8;
-ガ	id:33	[5.0, 5.3]	id=newid0127;ln=9;
-ヲ	id:34	[3.11, 3.13]	id=newid0128;ln=9;
-機能語相当	id:35	[5.43, 5.45]	id=newid0129;
-ニ	id:36	[5.46, 5.48]	id=newid0130;ln=11;
-ガ	id:37	[3.14, 3.21]	id=newid0131;ln=11;
-ヲ	id:38	[3.11, 3.13]	id=newid0132;ln=4;
-</tags>
-</text>
-<text id=OC09_01086m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-170
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.0, 3.1]	id=newid0104;
-np	id:5	[3.2, 3.4]	id=newid0105;
-np	id:6	[3.5, 3.8]	id=newid0106;
-np	id:7	[4.0, 4.2]	id=newid0108;
-np	id:8	[5.0, 5.3]	id=newid0109;
-np	id:9	[5.3, 5.5]	id=newid0110;
-np	id:10	[5.18, 5.20]	id=newid0111;
-np	id:11	[5.22, 5.23]	id=newid0112;
-np	id:12	[6.0, 6.3]	id=newid0113;
-np	id:13	[6.4, 6.6]	id=newid0114;
-np	id:14	[8.0, 8.3]	id=newid0116;
-np	id:15	[8.12, 8.14]	id=newid0118;
-np	id:16	[9.4, 9.5]	id=newid0120;
-np	id:17	[9.15, 9.19]	id=newid0121;
-述語	id:18	[3.9, 3.11]	id=newid0123;
-ニ	id:19	[3.5, 3.8]	id=newid0124;ln=newid0123;
-ガ	id:20	[1.0, 1.11]	id=newid0125;ln=newid0123;
-述語	id:21	[3.12, 3.21]	id=newid0126;
-ガ	id:22	[1.0, 1.11]	id=newid0127;ln=newid0126;
-述語	id:23	[4.0, 4.2]	id=newid0128;
-ガ	id:24	[1.0, 1.11]	id=newid0129;ln=newid0128;
-述語	id:25	[4.12, 4.15]	id=newid0130;
-ヲ	id:26	[4.8, 4.12]	id=newid0131;ln=newid0130;
-ガ	id:27	[1.0, 1.11]	id=newid0132;ln=newid0130;
-述語	id:28	[4.18, 4.20]	id=newid0133;
-ガ	id:29	[1.0, 1.11]	id=newid0134;ln=newid0133;
-述語	id:30	[5.3, 5.5]	id=newid0135;
-ガ	id:31	[1.0, 1.11]	id=newid0136;ln=newid0135;
-述語	id:32	[5.23, 5.28]	id=newid0137;
-ガ	id:33	[5.22, 5.23]	id=newid0138;ln=newid0137;
-述語	id:34	[6.0, 6.3]	id=newid0139;
-ガ	id:35	[6.4, 6.7]	id=newid0140;ln=newid0139;
-述語	id:36	[6.8, 6.10]	id=newid0141;
-ガ	id:37	[6.4, 6.7]	id=newid0142;ln=newid0141;
-述語	id:38	[8.8, 8.10]	id=newid0149;
-ヲ	id:39	[8.0, 8.3]	id=newid0150;ln=newid0149;
-ガ	id:40	[1.13, 1.24]	id=newid0151;ln=newid0149;
-述語	id:41	[8.15, 8.17]	id=newid0152;
-ガ	id:42	[1.13, 1.24]	id=newid0153;ln=newid0152;
-述語	id:43	[9.6, 9.9]	id=newid0154;
-ヲ	id:44	[9.4, 9.5]	id=newid0155;ln=newid0154;
-ガ	id:45	[1.13, 1.24]	id=newid0156;ln=newid0154;
-述語	id:46	[9.12, 9.14]	id=newid0157;
-ガ	id:47	[1.13, 1.24]	id=newid0158;ln=newid0157;
-述語	id:48	[9.20, 9.22]	id=newid0159;
-ニ	id:49	[9.15, 9.19]	id=newid0160;ln=newid0159;
-照応	id:50	[5.18, 5.20]	id=newid0162;
-照応	id:51	[5.0, 5.9]	id=newid0163;ln=newid0162;
-ガ	id:52	[9.12, 9.14]	id=newid0165;ln=newid0159;
-述語	id:53	[6.11, 6.18]	id=newid0166;
-ヲ	id:54	[6.4, 6.7]	id=newid0167;ln=newid0166;
-ガ	id:55	[1.13, 1.24]	id=newid0168;ln=newid0166;
-ニ	id:56	[1.0, 1.11]	id=newid0169;ln=newid0166;
-</tags>
-</text>
-<text id=OC09_01089m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-131
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.0, 3.3]	id=newid0104;
-ガ	id:5	[3.0, 3.3]	ln=1;id=newid0105;
-np	id:6	[3.4, 3.6]	id=newid0106;
-述語	id:7	[3.6, 3.8]	id=1;
-np	id:8	[4.2, 4.5]	id=newid0107;
-述語	id:9	[4.2, 4.5]	id=2;
-np	id:10	[5.0, 5.1]	id=newid0108;
-事態	id:11	[5.2, 5.4]	id=10000;
-np	id:12	[5.2, 5.6]	id=newid0109;
-述語	id:13	[5.2, 5.4]	id=3;
-np	id:14	[7.0, 7.3]	id=newid0110;
-np	id:15	[7.4, 7.10]	id=newid0111;
-事態	id:16	[7.11, 7.13]	id=10001;
-np	id:17	[7.11, 7.13]	id=newid0112;
-述語	id:18	[7.14, 7.16]	id=4;
-np	id:19	[8.0, 8.6]	id=newid0113;
-np	id:20	[8.8, 8.11]	id=newid0114;
-ヲ	id:21	[8.8, 8.11]	ln=5;id=newid0115;
-述語	id:22	[8.12, 8.15]	id=5;
-np	id:23	[8.15, 8.17]	id=newid0116;
-np	id:24	[8.18, 8.20]	id=newid0118;
-np	id:25	[8.22, 8.24]	id=newid0119;
-述語	id:26	[8.22, 8.24]	id=6;
-ガ	id:27	[3.0, 3.3]	id=newid0120;ln=2;
-ガ	id:28	[4.2, 4.6]	id=newid0121;ln=3;
-ガ	id:29	[7.0, 7.3]	id=newid0122;ln=4;
-ガ	id:30	[1.13, 1.24]	id=newid0123;ln=5;
-ガ	id:31	[8.0, 8.6]	id=newid0124;ln=6;
-ガ	id:32	[1.0, 1.11]	id=newid0125;ln=10000;
-ガ	id:33	[7.4, 7.10]	id=newid0126;ln=10001;
-述語	id:34	[9.6, 9.8]	id=newid0127;
-ガ	id:35	[9.2, 9.5]	id=newid0128;ln=newid0127;
-述語	id:36	[9.20, 9.22]	id=newid0129;
-ガ	id:37	[9.6, 9.12]	id=newid0130;ln=newid0129;
-</tags>
-</text>
-<text id=OC12_00015m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-134
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.3, 3.6]	id=newid0104;
-np	id:5	[3.7, 3.13]	id=newid0105;
-述語	id:6	[3.17, 3.19]	id=1;
-np	id:7	[6.0, 6.6]	id=newid0106;
-np	id:8	[6.6, 6.16]	id=newid0107;
-np	id:9	[7.0, 7.5]	id=newid0108;
-np	id:10	[8.0, 8.3]	id=newid0109;
-np	id:11	[9.0, 9.7]	id=newid0110;
-ヲ	id:12	[9.2, 9.7]	ln=2;id=newid0111;
-述語	id:13	[9.8, 9.12]	id=2;
-np	id:14	[9.12, 9.14]	id=newid0112;
-述語	id:15	[9.15, 9.17]	id=3;
-np	id:16	[9.17, 9.19]	id=newid0113;
-np	id:17	[10.0, 10.2]	id=newid0114;
-np	id:18	[11.0, 11.3]	id=newid0115;
-np	id:19	[11.4, 11.6]	id=newid0116;
-述語	id:20	[11.9, 11.11]	id=4;
-np	id:21	[12.0, 12.2]	id=newid0117;
-np	id:22	[12.3, 12.5]	id=newid0118;
-np	id:23	[12.6, 12.8]	id=newid0119;
-事態	id:24	[12.10, 12.12]	id=10000;
-np	id:25	[12.10, 12.12]	id=newid0120;
-述語	id:26	[12.13, 12.15]	id=5;
-ガ	id:27	[1.26, 1.36]	id=newid0121;ln=1;
-ガ	id:28	[1.26, 1.36]	id=newid0122;ln=2;
-ガ	id:29	[9.17, 9.19]	id=newid0123;ln=3;
-ガ	id:30	[11.1, 11.3]	id=newid0124;ln=4;
-ガ	id:31	[3.7, 3.13]	id=newid0126;ln=10000;
-ヲ	id:32	[12.6, 12.9]	id=newid0127;ln=10000;
-ガ	id:33	[3.7, 3.13]	id=newid0128;ln=5;
-照応	id:34	[9.0, 9.1]	id=newid0129;
-照応	id:35	[12.6, 12.9]	id=newid0130;ln=newid0129;
-述語	id:36	[4.11, 4.12]	id=newid0131;
-ガ	id:37	[4.1, 4.7]	id=newid0132;ln=newid0131;
-内容/結果物	id:38	[12.10, 12.12]	id=newid0133;
-</tags>
-</text>
-<text id=OC12_00016m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-165
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.0, 3.2]	id=newid0104;
-np	id:5	[3.18, 3.20]	id=newid0106;
-np	id:6	[3.21, 3.23]	id=newid0107;
-np	id:7	[3.24, 3.25]	id=newid0109;
-np	id:8	[5.3, 5.6]	id=newid0111;
-np	id:9	[5.9, 5.15]	id=newid0112;
-np	id:10	[5.16, 5.20]	id=newid0113;
-np	id:11	[8.0, 8.2]	id=newid0114;
-np	id:12	[8.14, 8.16]	id=newid0117;
-np	id:13	[8.20, 8.22]	id=newid0118;
-np	id:14	[8.40, 8.42]	id=newid0119;
-np	id:15	[8.43, 8.45]	id=newid0120;
-述語	id:16	[3.3, 3.5]	id=newid0121;
-ヲ	id:17	[3.0, 3.2]	id=newid0122;ln=newid0121;
-ガ	id:18	[1.26, 1.36]	id=newid0123;ln=newid0121;
-述語	id:19	[3.10, 3.15]	id=newid0124;
-ガ	id:20	[1.26, 1.36]	id=newid0125;ln=newid0124;
-述語	id:21	[3.18, 3.20]	id=newid0126;
-ガ	id:22	[3.21, 3.23]	id=newid0127;ln=newid0126;
-述語	id:23	[3.26, 3.28]	id=newid0128;
-ヲ	id:24	[3.21, 3.23]	id=newid0129;ln=newid0128;
-ニ	id:25	[3.24, 3.25]	id=newid0130;ln=newid0128;
-ガ	id:26	[1.26, 1.36]	id=newid0131;ln=newid0128;
-能動化不可	id:27	[3.24, 3.28]	id=newid0132;
-助動詞	id:28	[3.28, 3.30]	id=newid0133;
-追加無し	id:29	[3.28, 3.30]	id=newid0134;ln=newid0133;
-述語	id:30	[8.3, 8.5]	id=newid0135;
-ニ	id:31	[8.0, 8.2]	id=newid0136;ln=newid0135;
-ガ	id:32	[1.26, 1.36]	id=newid0137;ln=newid0135;
-述語	id:33	[8.12, 8.13]	id=newid0138;
-ヲ	id:34	[8.6, 8.9]	id=newid0139;ln=newid0138;
-ニ	id:35	[8.10, 8.11]	id=newid0140;ln=newid0138;
-ガ	id:36	[1.26, 1.36]	id=newid0141;ln=newid0138;
-述語	id:37	[8.17, 8.19]	id=newid0142;
-ガ	id:38	[8.14, 8.16]	id=newid0143;ln=newid0142;
-述語	id:39	[8.20, 8.22]	id=newid0144;
-ガ	id:40	[1.0, 1.11]	id=newid0145;ln=newid0144;
-述語	id:41	[8.30, 8.32]	id=newid0146;
-ガ	id:42	[1.26, 1.36]	id=newid0147;ln=newid0146;
-ヲ	id:43	[3.21, 3.23]	id=newid0148;ln=newid0146;
-助動詞	id:44	[8.32, 8.34]	id=newid0149;
-追加無し	id:45	[8.32, 8.34]	id=newid0150;ln=newid0149;
-述語	id:46	[8.43, 8.45]	id=newid0151;
-ガ	id:47	[8.40, 8.42]	id=newid0152;ln=newid0151;
-事態	id:48	[5.18, 5.20]	id=newid0153;
-ヲ	id:49	[5.16, 5.18]	id=newid0154;ln=newid0153;
-ガ	id:50	[1.26, 1.36]	id=newid0155;ln=newid0153;
-述語	id:51	[6.0, 6.3]	id=newid0159;
-ガ	id:52	[1.26, 1.36]	id=newid0160;ln=newid0159;
-ニ	id:53	[5.16, 5.20]	id=newid0161;ln=newid0159;
-助動詞	id:54	[6.3, 6.5]	id=newid0162;
-追加無し	id:55	[6.3, 6.5]	id=newid0163;ln=newid0162;
-ニ	id:56	[1.26, 1.36]	id=newid0164;ln=newid0146;
-</tags>
-</text>
-<text id=OC12_00018m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-137
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.0, 3.3]	id=newid0104;
-ヲ	id:5	[3.1, 3.3]	ln=1;id=newid0105;
-述語	id:6	[3.4, 3.6]	id=1;
-np	id:7	[3.10, 3.11]	id=newid0106;
-ヲ	id:8	[3.10, 3.11]	ln=2;id=newid0107;
-述語	id:9	[3.12, 3.14]	id=2;
-np	id:10	[3.14, 3.16]	id=newid0108;
-述語	id:11	[3.14, 3.16]	id=3;
-np	id:12	[5.0, 5.3]	id=newid0109;
-ヲ	id:13	[5.1, 5.3]	ln=4;id=newid0110;
-述語	id:14	[5.4, 5.6]	id=4;
-np	id:15	[5.10, 5.12]	id=newid0111;
-ヲ	id:16	[5.10, 5.12]	ln=5;id=newid0112;
-np	id:17	[5.13, 5.14]	id=newid0113;
-ニ	id:18	[5.13, 5.14]	ln=5;id=newid0114;
-np	id:19	[5.15, 5.18]	id=newid0115;
-述語	id:20	[5.15, 5.18]	id=5;
-述語	id:21	[5.21, 5.23]	id=6;
-述語	id:22	[5.24, 5.26]	id=7;
-np	id:23	[5.27, 5.29]	id=newid0117;
-ヲ	id:24	[5.27, 5.29]	ln=8;id=newid0118;
-述語	id:25	[5.30, 5.32]	id=8;
-np	id:26	[6.0, 6.1]	id=newid0119;
-ヲ	id:27	[6.0, 6.1]	ln=9;id=newid0120;
-述語	id:28	[6.2, 6.4]	id=9;
-np	id:29	[6.4, 6.6]	id=newid0121;
-np	id:30	[6.8, 6.10]	id=newid0122;
-ヲ	id:31	[6.8, 6.10]	ln=10;id=newid0123;
-述語	id:32	[6.11, 6.14]	id=10;
-述語	id:33	[6.15, 6.17]	id=11;
-ガ	id:34	[1.26, 1.36]	id=newid0124;ln=1;
-ガ	id:35	[1.26, 1.36]	id=newid0125;ln=2;
-ガ	id:36	[3.4, 3.6]	id=newid0126;ln=3;
-ガ	id:37	[1.26, 1.36]	id=newid0127;ln=4;
-ガ	id:38	[1.26, 1.36]	id=newid0128;ln=5;
-ガ	id:39	[1.26, 1.36]	id=newid0129;ln=6;
-ヲ	id:40	[5.10, 5.12]	id=newid0130;ln=6;
-ガ	id:41	[1.26, 1.36]	id=newid0131;ln=7;
-ヲ	id:42	[5.10, 5.12]	id=newid0132;ln=7;
-ガ	id:43	[1.26, 1.36]	id=newid0133;ln=8;
-ガ	id:44	[1.26, 1.36]	id=newid0134;ln=9;
-ガ	id:45	[1.26, 1.36]	id=newid0135;ln=10;
-ガ	id:46	[1.26, 1.36]	id=newid0136;ln=11;
-</tags>
-</text>
-<text id=OC12_00019m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-196
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.0, 3.2]	id=newid0104;
-np	id:5	[3.3, 3.7]	id=newid0105;
-ヲ	id:6	[3.3, 3.7]	ln=1;id=newid0106;
-述語	id:7	[3.8, 3.10]	id=1;
-np	id:8	[4.0, 4.4]	id=newid0107;
-ヲ	id:9	[4.0, 4.4]	ln=2;id=newid0108;
-述語	id:10	[4.5, 4.8]	id=2;
-np	id:11	[4.9, 4.10]	id=newid0109;
-np	id:12	[4.11, 4.13]	id=newid0110;
-np	id:13	[5.0, 5.2]	id=newid0112;
-np	id:14	[5.4, 5.8]	id=newid0113;
-述語	id:15	[5.4, 5.6]	id=4;
-np	id:16	[6.0, 6.2]	id=newid0114;
-np	id:17	[6.7, 6.9]	id=newid0115;
-述語	id:18	[6.10, 6.12]	id=5;
-述語	id:19	[6.16, 6.19]	id=7;
-述語	id:20	[7.0, 7.2]	id=8;
-np	id:21	[7.5, 7.6]	id=newid0116;
-np	id:22	[9.0, 9.9]	id=newid0117;
-np	id:23	[9.10, 9.14]	id=newid0118;
-ガ	id:24	[9.10, 9.14]	ln=10;id=newid0119;
-np	id:25	[9.15, 9.17]	id=newid0120;
-述語	id:26	[9.15, 9.17]	id=10;
-np	id:27	[10.0, 10.2]	id=newid0121;
-np	id:28	[10.12, 10.14]	id=newid0122;
-np	id:29	[10.15, 10.19]	id=newid0124;
-np	id:30	[10.20, 10.22]	id=newid0125;
-np	id:31	[12.0, 12.1]	id=newid0126;
-ニ	id:32	[12.0, 12.1]	ln=13;id=newid0127;
-np	id:33	[12.2, 12.4]	id=newid0128;
-述語	id:34	[12.2, 12.4]	id=13;
-述語	id:35	[12.10, 12.14]	id=14;
-np	id:36	[12.18, 12.20]	id=newid0129;
-述語	id:37	[12.18, 12.20]	id=16;
-np	id:38	[13.4, 13.13]	id=newid0131;
-np	id:39	[13.14, 13.18]	id=newid0132;
-np	id:40	[13.19, 13.23]	id=newid0133;
-np	id:41	[13.24, 13.28]	id=newid0134;
-ガ	id:42	[1.0, 1.11]	id=newid0135;ln=1;
-述語	id:43	[4.17, 4.19]	id=newid0137;
-ガ	id:44	[4.11, 4.13]	id=newid0138;ln=newid0137;
-ガ	id:45	[5.0, 5.2]	id=newid0139;ln=4;
-述語	id:46	[6.3, 6.5]	id=newid0140;
-ガ	id:47	[6.0, 6.2]	id=newid0141;ln=newid0140;
-ニ	id:48	[1.0, 1.11]	id=newid0142;ln=newid0140;
-助動詞	id:49	[6.5, 6.6]	id=newid0143;
-追加無し	id:50	[6.5, 6.6]	id=newid0144;ln=newid0143;
-ガ	id:51	[1.0, 1.11]	id=newid0145;ln=5;
-ガ	id:52	[1.0, 1.11]	id=newid0146;ln=7;
-ガ	id:53	[7.5, 7.6]	id=newid0147;ln=8;
-述語	id:54	[10.0, 10.5]	id=newid0154;
-ヲ	id:55	[4.0, 4.4]	id=newid0155;ln=newid0154;
-ガ	id:56	[1.26, 1.36]	id=newid0156;ln=newid0154;
-述語	id:57	[10.15, 10.17]	id=newid0157;
-ガ	id:58	[10.12, 10.14]	id=newid0158;ln=newid0157;
-ガ	id:59	[10.20, 10.22]	id=newid0159;ln=13;
-ガ	id:60	[1.26, 1.36]	id=newid0161;ln=14;
-内容/結果物	id:61	[10.12, 10.14]	id=newid0165;
-ガ	id:62	[1.26, 1.36]	id=newid0168;ln=2;
-モノ	id:63	[9.12, 9.14]	id=newid0170;
-モノ	id:64	[10.20, 10.22]	id=newid0171;
-モノ	id:65	[13.16, 13.18]	id=newid0172;
-ヲ	id:66	[5.4, 5.12]	id=newid0173;ln=8;
-述語	id:67	[11.0, 11.4]	id=newid0174;
-ガ	id:68	[10.20, 10.22]	id=newid0175;ln=newid0174;
-述語	id:69	[11.11, 11.14]	id=newid0176;
-ガ	id:70	[11.5, 11.7]	id=newid0177;ln=newid0176;
-ニ	id:71	[11.8, 11.10]	id=newid0178;ln=newid0176;
-述語	id:72	[11.18, 11.20]	id=newid0179;
-ガ	id:73	[11.8, 11.10]	id=newid0180;ln=newid0179;
-ヲ	id:74	[4.0, 4.4]	id=newid0181;ln=14;
-ガ	id:75	[11.8, 11.10]	id=newid0182;ln=16;
-述語	id:76	[14.3, 14.6]	id=newid0183;
-ガ	id:77	[13.24, 13.28]	id=newid0184;ln=newid0183;
-ヲ	id:78	[13.14, 13.18]	id=newid0185;ln=newid0183;
-助動詞	id:79	[14.6, 14.7]	id=newid0186;
-追加無し	id:80	[14.6, 14.7]	id=newid0187;ln=newid0186;
-照応	id:81	[4.14, 4.26]	id=newid0188;
-照応	id:82	[5.0, 5.2]	id=newid0189;ln=newid0188;
-照応	id:83	[10.20, 10.22]	id=newid0190;
-照応	id:84	[11.5, 11.7]	id=newid0191;ln=newid0190;
-述語	id:85	[7.7, 7.14]	id=newid0192;
-ガ	id:86	[7.5, 7.6]	id=newid0193;ln=newid0192;
-ニ	id:87	[1.0, 1.11]	id=newid0194;ln=newid0192;
-ヲ	id:88	[1.26, 1.36]	id=newid0195;ln=newid0192;
-</tags>
-</text>
-<text id=OC12_00763m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-213
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.0, 3.3]	id=newid0104;
-np	id:5	[3.4, 3.6]	id=newid0105;
-np	id:6	[4.0, 4.7]	id=newid0106;
-np	id:7	[7.0, 7.2]	id=newid0107;
-np	id:8	[7.3, 7.5]	id=newid0108;
-np	id:9	[7.6, 7.9]	id=newid0109;
-述語	id:10	[7.6, 7.9]	id=2;
-np	id:11	[9.0, 9.2]	id=newid0110;
-np	id:12	[10.0, 10.2]	id=newid0111;
-np	id:13	[10.7, 10.9]	id=newid0113;
-np	id:14	[10.16, 10.19]	id=newid0114;
-np	id:15	[11.0, 11.4]	id=newid0115;
-述語	id:16	[11.8, 11.11]	id=5;
-np	id:17	[11.16, 11.21]	id=newid0116;
-述語	id:18	[11.16, 11.21]	id=6;
-np	id:19	[11.22, 11.24]	id=newid0117;
-np	id:20	[12.0, 12.3]	id=newid0118;
-np	id:21	[12.4, 12.6]	id=newid0119;
-事態	id:22	[12.7, 12.9]	id=10001;
-np	id:23	[12.7, 12.9]	id=newid0120;
-ヲ	id:24	[12.7, 12.9]	ln=8;id=newid0121;
-np	id:25	[12.10, 12.12]	id=newid0122;
-述語	id:26	[12.13, 12.15]	id=8;
-np	id:27	[12.15, 12.17]	id=newid0123;
-述語	id:28	[13.1, 13.3]	id=9;
-np	id:29	[13.7, 13.9]	id=newid0124;
-np	id:30	[13.10, 13.13]	id=newid0125;
-np	id:31	[13.16, 13.18]	id=newid0126;
-ニ	id:32	[13.16, 13.18]	ln=10;id=newid0127;
-np	id:33	[13.19, 13.21]	id=newid0128;
-述語	id:34	[13.19, 13.21]	id=10;
-事態	id:35	[14.4, 14.6]	id=10002;
-np	id:36	[14.4, 14.6]	id=newid0129;
-np	id:37	[14.12, 14.14]	id=newid0130;
-事態	id:38	[14.17, 14.19]	id=10003;
-np	id:39	[14.17, 14.21]	id=newid0132;
-ガ	id:40	[14.17, 14.21]	ln=11;id=newid0133;
-np	id:41	[14.22, 14.24]	id=newid0134;
-ヲ	id:42	[14.22, 14.24]	ln=11;id=newid0135;
-述語	id:43	[14.25, 14.28]	id=11;
-np	id:44	[14.28, 14.30]	id=newid0136;
-述語	id:45	[14.31, 14.33]	id=12;
-np	id:46	[15.4, 15.8]	id=newid0137;
-np	id:47	[16.2, 16.8]	id=newid0138;
-述語	id:48	[3.7, 3.9]	id=newid0139;
-ヲ	id:49	[3.4, 3.6]	id=newid0140;ln=newid0139;
-ガ	id:50	[1.26, 1.36]	id=newid0141;ln=newid0139;
-助動詞	id:51	[3.9, 3.11]	id=newid0142;
-追加無し	id:52	[3.9, 3.11]	id=newid0143;ln=newid0142;
-ヲ	id:53	[7.3, 7.5]	id=newid0144;ln=2;
-ガ	id:54	[4.0, 4.7]	id=newid0145;ln=2;
-述語	id:55	[7.10, 7.12]	id=newid0146;
-ガ	id:56	[4.0, 4.7]	id=newid0147;ln=newid0146;
-ヲ	id:57	[7.3, 7.5]	id=newid0148;ln=newid0146;
-機能語相当	id:58	[7.13, 7.15]	id=newid0149;
-述語	id:59	[9.3, 9.5]	id=newid0150;
-ヲ	id:60	[3.4, 3.6]	id=newid0151;ln=newid0150;
-ガ	id:61	[1.26, 1.36]	id=newid0152;ln=newid0150;
-助動詞	id:62	[9.5, 9.7]	id=newid0153;
-追加無し	id:63	[9.5, 9.7]	id=newid0154;ln=newid0153;
-機能語相当	id:64	[10.3, 10.6]	id=newid0155;
-述語	id:65	[10.10, 10.13]	id=newid0156;
-ガ	id:66	[1.26, 1.36]	id=newid0157;ln=newid0156;
-ヲ	id:67	[1.26, 1.36]	id=newid0158;ln=newid0156;
-ガ	id:68	[11.22, 11.24]	id=newid0159;ln=5;
-ガ	id:69	[11.22, 11.24]	id=newid0160;ln=6;
-ガ	id:70	[1.26, 1.36]	id=newid0161;ln=8;
-ガ	id:71	[10.16, 10.19]	id=newid0162;ln=9;
-ガ	id:72	[1.26, 1.36]	id=newid0163;ln=10;
-述語	id:73	[14.7, 14.9]	id=newid0164;
-ヲ	id:74	[14.4, 14.6]	id=newid0165;ln=newid0164;
-助動詞	id:75	[14.9, 14.10]	id=newid0167;
-追加無し	id:76	[14.9, 14.10]	id=newid0168;ln=newid0167;
-機能語相当	id:77	[14.15, 14.17]	id=newid0169;
-ガ	id:78	[14.28, 14.30]	id=newid0170;ln=12;
-述語	id:79	[15.9, 15.12]	id=newid0171;
-ヲ	id:80	[15.4, 15.8]	id=newid0172;ln=newid0171;
-ガ	id:81	[1.26, 1.36]	id=newid0173;ln=newid0171;
-助動詞	id:82	[15.12, 15.13]	id=newid0174;
-追加無し	id:83	[15.12, 15.13]	id=newid0175;ln=newid0174;
-述語	id:84	[16.3, 16.8]	id=newid0176;
-ガ	id:85	[4.0, 4.7]	id=newid0177;ln=newid0176;
-ガ	id:86	[12.4, 12.6]	id=newid0178;ln=10001;
-ガ	id:87	[1.26, 1.36]	id=newid0179;ln=10002;
-ヲ	id:88	[1.26, 1.36]	id=newid0180;ln=10002;
-ニ	id:89	[1.26, 1.36]	id=newid0181;ln=10002;
-ガ	id:90	[14.20, 14.21]	id=newid0182;ln=10003;
-ヲ	id:91	[1.26, 1.36]	id=newid0183;ln=10003;
-ニ	id:92	[1.26, 1.36]	id=newid0184;ln=10003;
-助動詞	id:93	[10.13, 10.14]	id=newid0189;
-追加無し	id:94	[10.13, 10.14]	id=newid0190;ln=newid0189;
-ガ	id:95	[1.26, 1.36]	id=newid0193;ln=newid0164;
-ニ	id:96	[1.26, 1.36]	id=newid0194;ln=newid0171;
-内容/結果物	id:97	[15.4, 15.6]	id=newid0195;
-述語	id:98	[5.10, 5.15]	id=newid0196;
-ニ	id:99	[5.3, 5.9]	id=newid0197;ln=newid0196;
-ガ	id:100	[4.0, 4.7]	id=newid0199;ln=newid0196;
-述語	id:101	[6.11, 6.13]	id=newid0200;
-ニ	id:102	[6.8, 6.10]	id=newid0201;ln=newid0200;
-ガ	id:103	[4.0, 4.7]	id=newid0202;ln=newid0200;
-機能語相当	id:104	[6.14, 6.16]	id=newid0203;
-述語	id:105	[6.19, 6.21]	id=newid0204;
-ヲ	id:106	[6.16, 6.18]	id=newid0205;ln=newid0204;
-ガ	id:107	[1.0, 1.11]	id=newid0206;ln=newid0204;
-事態	id:108	[5.5, 5.7]	id=newid0207;
-ヲ	id:109	[5.3, 5.5]	id=newid0208;ln=newid0207;
-ガ	id:110	[1.26, 1.36]	id=newid0209;ln=newid0207;
-事態	id:111	[5.7, 5.9]	id=newid0210;
-ガ	id:112	[1.26, 1.36]	id=newid0211;ln=newid0210;
-ヲ	id:113	[1.26, 1.36]	id=newid0212;ln=10003;
-</tags>
-</text>
-<text id=OC12_00764m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-212
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.0, 3.2]	id=newid0104;
-np	id:5	[3.3, 3.4]	id=newid0105;
-np	id:6	[3.6, 3.8]	id=newid0106;
-述語	id:7	[3.9, 3.11]	id=1;
-np	id:8	[3.12, 3.14]	id=newid0108;
-ヲ	id:9	[3.12, 3.14]	ln=2;id=newid0109;
-np	id:10	[3.15, 3.18]	id=newid0110;
-述語	id:11	[3.15, 3.18]	id=2;
-np	id:12	[4.0, 4.2]	id=newid0111;
-np	id:13	[4.2, 4.4]	id=newid0112;
-述語	id:14	[4.2, 4.4]	id=4;
-np	id:15	[5.0, 5.9]	id=newid0113;
-np	id:16	[5.10, 5.12]	id=newid0114;
-ヲ	id:17	[5.11, 5.12]	ln=5;id=newid0115;
-述語	id:18	[5.13, 5.15]	id=5;
-np	id:19	[5.16, 5.18]	id=newid0116;
-np	id:20	[5.19, 5.20]	id=newid0117;
-np	id:21	[5.21, 5.23]	id=newid0118;
-np	id:22	[5.24, 5.25]	id=newid0119;
-述語	id:23	[5.26, 5.28]	id=6;
-np	id:24	[6.0, 6.5]	id=newid0120;
-np	id:25	[6.7, 6.9]	id=newid0122;
-np	id:26	[6.10, 6.15]	id=newid0123;
-np	id:27	[6.16, 6.18]	id=newid0124;
-述語	id:28	[6.18, 6.20]	id=7;
-np	id:29	[6.20, 6.21]	id=newid0125;
-np	id:30	[6.22, 6.24]	id=newid0126;
-np	id:31	[6.28, 6.30]	id=newid0127;
-np	id:32	[8.4, 8.7]	id=newid0128;
-np	id:33	[10.4, 10.5]	id=newid0129;
-np	id:34	[10.10, 10.14]	id=newid0130;
-np	id:35	[10.16, 10.19]	id=newid0131;
-np	id:36	[10.20, 10.21]	id=newid0132;
-np	id:37	[10.22, 10.24]	id=newid0134;
-np	id:38	[10.27, 10.28]	id=newid0136;
-np	id:39	[10.29, 10.33]	id=newid0137;
-np	id:40	[10.34, 10.36]	id=newid0138;
-np	id:41	[10.36, 10.39]	id=newid0139;
-np	id:42	[12.0, 12.2]	id=newid0140;
-np	id:43	[12.3, 12.5]	id=newid0141;
-np	id:44	[12.6, 12.8]	id=newid0142;
-np	id:45	[12.11, 12.13]	id=newid0143;
-np	id:46	[12.15, 12.16]	id=newid0144;
-np	id:47	[12.23, 12.25]	id=newid0146;
-np	id:48	[12.26, 12.27]	id=newid0147;
-np	id:49	[12.28, 12.32]	id=newid0148;
-np	id:50	[12.33, 12.36]	id=newid0149;
-np	id:51	[12.37, 12.43]	id=newid0150;
-np	id:52	[13.3, 13.5]	id=newid0151;
-ヲ	id:53	[13.3, 13.5]	ln=16;id=newid0152;
-述語	id:54	[13.6, 13.9]	id=16;
-事態	id:55	[14.0, 14.2]	id=10001;
-np	id:56	[14.0, 14.2]	id=newid0153;
-np	id:57	[15.0, 15.3]	id=newid0154;
-np	id:58	[15.9, 15.11]	id=newid0155;
-ガ	id:59	[1.26, 1.36]	id=newid0156;ln=1;
-ガ	id:60	[1.13, 1.24]	id=newid0157;ln=2;
-ガ	id:61	[1.0, 1.11]	id=newid0163;ln=4;
-ヲ	id:62	[6.23, 6.30]	id=newid0164;ln=4;
-ガ	id:63	[1.26, 1.36]	id=newid0165;ln=5;
-ガ	id:64	[5.24, 5.25]	id=newid0166;ln=6;
-述語	id:65	[6.11, 6.14]	id=newid0167;
-ガ	id:66	[6.7, 6.9]	id=newid0168;ln=newid0167;
-ガ	id:67	[6.17, 6.18]	id=newid0169;ln=7;
-ガ	id:68	[1.0, 1.11]	id=newid0174;ln=16;
-述語	id:69	[15.9, 15.11]	id=newid0175;
-ニ	id:70	[15.0, 15.3]	id=newid0176;ln=newid0175;
-ガ	id:71	[1.26, 1.36]	id=newid0177;ln=newid0175;
-ヲ	id:72	[15.5, 15.9]	id=newid0178;ln=newid0175;
-助動詞	id:73	[15.11, 15.13]	id=newid0179;
-追加無し	id:74	[15.11, 15.13]	id=newid0180;ln=newid0179;
-ガ	id:75	[1.0, 1.11]	id=newid0183;ln=10001;
-述語	id:76	[5.4, 5.6]	id=newid0185;
-ガ	id:77	[1.26, 1.36]	id=newid0186;ln=newid0185;
-ヲ	id:78	[5.1, 5.4]	id=newid0187;ln=newid0185;
-述語	id:79	[6.25, 6.27]	id=newid0188;
-ガ	id:80	[6.20, 6.21]	id=newid0189;ln=newid0188;
-述語	id:81	[6.28, 6.30]	id=newid0190;
-述語	id:82	[12.18, 12.20]	id=newid0192;
-ニ	id:83	[12.15, 12.16]	id=newid0193;ln=newid0192;
-ガ	id:84	[12.17, 12.18]	id=newid0194;ln=newid0192;
-述語	id:85	[10.25, 10.27]	id=newid0196;
-ニ	id:86	[10.22, 10.24]	id=newid0198;ln=newid0196;
-事態	id:87	[3.16, 3.18]	id=newid0199;
-ヲ	id:88	[3.12, 3.14]	id=newid0200;ln=newid0199;
-ガ	id:89	[1.13, 1.24]	id=newid0201;ln=newid0199;
-述語	id:90	[8.4, 8.10]	id=newid0202;
-ガ	id:91	[1.0, 1.11]	id=newid0203;ln=newid0202;
-ニ	id:92	[1.13, 1.24]	id=newid0204;ln=newid0202;
-ヲ	id:93	[3.6, 3.8]	id=newid0205;ln=1;
-ガ	id:94	[6.20, 6.21]	id=newid0206;ln=newid0190;
-ガ	id:95	[10.10, 10.14]	id=newid0207;ln=newid0196;
-述語	id:96	[3.22, 3.29]	id=newid0208;
-ガ	id:97	[1.13, 1.24]	id=newid0209;ln=newid0208;
-ニ	id:98	[1.0, 1.11]	id=newid0210;ln=newid0208;
-ヲ	id:99	[3.12, 3.14]	id=newid0211;ln=newid0208;
-</tags>
-</text>
-<text id=OC12_00765m_0>
-<attribute>
-checked	0
-</attribute>
-<lastid>
-133
-</lastid>
-<tags>
-np	id:0	[1.0, 1.11]	id=newid0100;
-np	id:1	[1.13, 1.24]	id=newid0101;
-np	id:2	[1.26, 1.36]	id=newid0102;
-np	id:3	[1.38, 1.45]	id=newid0103;
-np	id:4	[3.0, 3.2]	id=newid0104;
-np	id:5	[3.3, 3.12]	id=newid0105;
-np	id:6	[3.14, 3.15]	id=newid0106;
-np	id:7	[5.0, 5.5]	id=newid0107;
-np	id:8	[6.0, 6.2]	id=newid0108;
-述語	id:9	[6.0, 6.2]	id=1;
-np	id:10	[6.3, 6.6]	id=newid0109;
-述語	id:11	[6.3, 6.6]	id=2;
-np	id:12	[6.7, 6.9]	id=newid0110;
-np	id:13	[6.10, 6.12]	id=newid0111;
-ガ	id:14	[6.10, 6.12]	ln=3;id=newid0112;
-np	id:15	[6.13, 6.15]	id=newid0113;
-述語	id:16	[6.13, 6.15]	id=3;
-np	id:17	[7.0, 7.5]	id=newid0114;
-np	id:18	[7.6, 7.7]	id=newid0115;
-np	id:19	[7.9, 7.12]	id=newid0116;
-ヲ	id:20	[7.10, 7.12]	ln=4;id=newid0117;
-述語	id:21	[7.13, 7.15]	id=4;
-述語	id:22	[7.16, 7.18]	id=5;
-np	id:23	[7.20, 7.26]	id=newid0118;
-ヲ	id:24	[7.21, 7.26]	ln=6;id=newid0119;
-np	id:25	[7.27, 7.29]	id=newid0120;
-述語	id:26	[7.27, 7.29]	id=6;
-np	id:27	[7.32, 7.34]	id=newid0121;
-ガ	id:28	[7.32, 7.34]	ln=7;id=newid0122;
-述語	id:29	[7.35, 7.37]	id=7;
-述語	id:30	[3.16, 3.19]	id=newid0124;
-ガ	id:31	[3.3, 3.12]	id=newid0125;ln=newid0124;
-ガ	id:32	[6.10, 6.12]	id=newid0126;ln=1;
-ガ	id:33	[1.26, 1.36]	id=newid0128;ln=4;
-ガ	id:34	[1.26, 1.36]	id=newid0129;ln=5;
-ガ	id:35	[1.26, 1.36]	id=newid0130;ln=6;
-ガ	id:36	[6.10, 6.12]	id=newid0132;ln=2;
-</tags>
-</text>
+<text id=OC03_01038m_0>
+<attribute>
+checked	0
+</attribute>
+<lastid>
+133
+</lastid>
+<tags>
+np	id:0	[1.0, 1.11]	id=newid0100;
+np	id:1	[1.13, 1.24]	id=newid0101;
+np	id:2	[1.26, 1.36]	id=newid0102;
+np	id:3	[1.38, 1.45]	id=newid0103;
+述語	id:4	[3.0, 3.5]	id=1;
+np	id:5	[3.6, 3.10]	id=newid0104;
+ヲ	id:6	[3.6, 3.10]	ln=2;id=newid0105;
+np	id:7	[3.11, 3.13]	id=newid0106;
+ニ	id:8	[3.11, 3.13]	ln=2;id=newid0107;
+np	id:9	[3.14, 3.20]	id=newid0108;
+述語	id:10	[3.14, 3.20]	id=2;
+np	id:11	[3.22, 3.27]	id=newid0109;
+述語	id:12	[3.25, 3.27]	id=3;
+np	id:13	[5.0, 5.5]	id=newid0110;
+述語	id:14	[5.6, 5.9]	id=4;
+np	id:15	[5.12, 5.15]	id=newid0111;
+np	id:16	[5.19, 5.21]	id=newid0112;
+述語	id:17	[5.19, 5.21]	id=7;
+np	id:18	[5.30, 5.31]	id=newid0113;
+ニ	id:19	[5.30, 5.31]	ln=8;id=newid0114;
+述語	id:20	[5.32, 5.34]	id=8;
+述語	id:21	[5.36, 5.42]	id=9;
+述語	id:22	[5.49, 5.51]	id=11;
+ヲ	id:23	[3.6, 3.10]	id=newid0115;ln=1;
+ガ	id:24	[5.0, 5.3]	id=newid0117;ln=1;
+ガ	id:25	[5.0, 5.3]	id=newid0118;ln=2;
+ガ	id:26	[3.14, 3.21]	id=newid0119;ln=3;
+ニ	id:27	[3.22, 3.24]	id=newid0120;ln=3;
+ガ	id:28	[5.0, 5.3]	id=newid0121;ln=4;
+機能語相当	id:29	[5.10, 5.12]	id=newid0123;
+ガ	id:30	[3.14, 3.21]	id=newid0124;ln=7;
+ガ	id:31	[5.0, 5.3]	id=newid0125;ln=8;
+ヲ	id:32	[3.11, 3.13]	id=newid0126;ln=8;
+ガ	id:33	[5.0, 5.3]	id=newid0127;ln=9;
+ヲ	id:34	[3.11, 3.13]	id=newid0128;ln=9;
+機能語相当	id:35	[5.43, 5.45]	id=newid0129;
+ニ	id:36	[5.46, 5.48]	id=newid0130;ln=11;
+ガ	id:37	[3.14, 3.21]	id=newid0131;ln=11;
+ヲ	id:38	[3.11, 3.13]	id=newid0132;ln=4;
+</tags>
+</text>
+<text id=OC09_01086m_0>
+<attribute>
+checked	0
+</attribute>
+<lastid>
+170
+</lastid>
+<tags>
+np	id:0	[1.0, 1.11]	id=newid0100;
+np	id:1	[1.13, 1.24]	id=newid0101;
+np	id:2	[1.26, 1.36]	id=newid0102;
+np	id:3	[1.38, 1.45]	id=newid0103;
+np	id:4	[3.0, 3.1]	id=newid0104;
+np	id:5	[3.2, 3.4]	id=newid0105;
+np	id:6	[3.5, 3.8]	id=newid0106;
+np	id:7	[4.0, 4.2]	id=newid0108;
+np	id:8	[5.0, 5.3]	id=newid0109;
+np	id:9	[5.3, 5.5]	id=newid0110;
+np	id:10	[5.18, 5.20]	id=newid0111;
+np	id:11	[5.22, 5.23]	id=newid0112;
+np	id:12	[6.0, 6.3]	id=newid0113;
+np	id:13	[6.4, 6.6]	id=newid0114;
+np	id:14	[8.0, 8.3]	id=newid0116;
+np	id:15	[8.12, 8.14]	id=newid0118;
+np	id:16	[9.4, 9.5]	id=newid0120;
+np	id:17	[9.15, 9.19]	id=newid0121;
+述語	id:18	[3.9, 3.11]	id=newid0123;
+ニ	id:19	[3.5, 3.8]	id=newid0124;ln=newid0123;
+ガ	id:20	[1.0, 1.11]	id=newid0125;ln=newid0123;
+述語	id:21	[3.12, 3.21]	id=newid0126;
+ガ	id:22	[1.0, 1.11]	id=newid0127;ln=newid0126;
+述語	id:23	[4.0, 4.2]	id=newid0128;
+ガ	id:24	[1.0, 1.11]	id=newid0129;ln=newid0128;
+述語	id:25	[4.12, 4.15]	id=newid0130;
+ヲ	id:26	[4.8, 4.12]	id=newid0131;ln=newid0130;
+ガ	id:27	[1.0, 1.11]	id=newid0132;ln=newid0130;
+述語	id:28	[4.18, 4.20]	id=newid0133;
+ガ	id:29	[1.0, 1.11]	id=newid0134;ln=newid0133;
+述語	id:30	[5.3, 5.5]	id=newid0135;
+ガ	id:31	[1.0, 1.11]	id=newid0136;ln=newid0135;
+述語	id:32	[5.23, 5.28]	id=newid0137;
+ガ	id:33	[5.22, 5.23]	id=newid0138;ln=newid0137;
+述語	id:34	[6.0, 6.3]	id=newid0139;
+ガ	id:35	[6.4, 6.7]	id=newid0140;ln=newid0139;
+述語	id:36	[6.8, 6.10]	id=newid0141;