moin-2.0 / MoinMoin / storage / middleware / indexing.py

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
# Copyright: 2011 MoinMoin:RonnyPfannschmidt
# Copyright: 2011 MoinMoin:ThomasWaldmann
# Copyright: 2011 MoinMoin:MichaelMayorov
# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.

"""
MoinMoin - indexing middleware

The backends and stores moin uses are rather simple, it is mostly just a
unsorted / unordered bunch of revisions (meta and data) with iteration.

The indexer middleware adds the needed power: after all metadata and data
is indexed, we can do all sorts of operations on the indexer level:
* searching
* lookup by name, uuid, ...
* selecting
* listing

Using Whoosh (a fast pure-Python indexing and search library), we build,
maintain and use 2 indexes:

* "all revisions" index (big, needed for history search)
* "latest revisions" index (smaller, just the current revisions)

When creating or destroying revisions, indexes are automatically updated.

There is also code to do a full index rebuild in case it gets damaged, lost
or needs rebuilding for other reasons. There is also index update code to
do a quick "intelligent" update of a "mostly ok" index, that just adds,
updates, deletes stuff that is different in backend compared to current index.

Indexing is the only layer that can easily deal with **names** (it can
easily translate names to UUIDs and vice versa) and with **items** (it
knows current revision, it can easily list and order historial revisions),
using the index.

The layers below are using UUIDs to identify revisions meta and data:

* revid (metaid) - a UUID identifying a specific revision (revision metadata)
* dataid - a UUID identifying some specific revision data (optional), it is
  just stored into revision metadata.
* itemid - a UUID identifying an item (== a set of revisions), it is just
  stored into revision metadata. itemid is only easily usable on indexing
  level.

Many methods provided by the indexing middleware will be fast, because they
will not access the layers below (like the backend), but just the index files,
usually it is even just the small and thus quick latest-revs index.
"""


from __future__ import absolute_import, division

import os
import shutil
import datetime

from MoinMoin import log
logging = log.getLogger(__name__)

from flask import request
from flask import g as flaskg
from flask import current_app as app

from whoosh.fields import Schema, TEXT, ID, IDLIST, NUMERIC, DATETIME, KEYWORD, BOOLEAN
from whoosh.writing import AsyncWriter
from whoosh.qparser import QueryParser, MultifieldParser, RegexPlugin, PseudoFieldPlugin
from whoosh.qparser import WordNode
from whoosh.query import Every, Term
from whoosh.sorting import FieldFacet

from MoinMoin import log
logging = log.getLogger(__name__)

from MoinMoin.constants.keys import (WIKINAME, NAMESPACE, NAME, NAME_EXACT, MTIME, CONTENTTYPE, TAGS, LANGUAGE,
                                     USERID, ADDRESS, HOSTNAME, SIZE, ACTION, COMMENT, SUMMARY, CONTENT,
                                     EXTERNALLINKS, ITEMLINKS, ITEMTRANSCLUSIONS, ACL, EMAIL, OPENID,
                                     ITEMID, REVID, CURRENT, PARENTID, PTIME, LATEST_REVS, ALL_REVS, BACKENDNAME)
from MoinMoin.constants.contenttypes import CONTENTTYPE_USER
from MoinMoin.constants.namespaces import NAMESPACE_DEFAULT
from MoinMoin.constants import keys
from MoinMoin.constants.keys import ITEMTYPE

from MoinMoin import user
from MoinMoin.search.analyzers import item_name_analyzer, MimeTokenizer, AclTokenizer
from MoinMoin.themes import utctimestamp
from MoinMoin.storage.middleware.validation import ContentMetaSchema, UserMetaSchema
from MoinMoin.storage.error import NoSuchItemError, ItemAlreadyExistsError


WHOOSH_FILESTORAGE = 'FileStorage'
INDEXES = [LATEST_REVS, ALL_REVS, ]


def get_names(meta):
    """
    Get the (list of) names from meta data and deal with misc. bad things that
    can happen then (while not all code is fixed to do it correctly).

    TODO make sure meta[NAME] is always a list of unicode

    :param meta: a metadata dictionary that might have a NAME key
    :return: list of names
    """
    msg = "NAME is not a list but %r - fix this! Workaround enabled."
    names = meta.get(NAME)
    if names is None:
        logging.warning(msg % names)
        names = []
    elif isinstance(names, str):
        logging.warning(msg % names)
        names = [names.decode('utf-8'), ]
    elif isinstance(names, unicode):
        logging.warning(msg % names)
        names = [names, ]
    elif isinstance(names, tuple):
        logging.warning(msg % names)
        names = list(names)
    elif not isinstance(names, list):
        raise TypeError("NAME is not a list but %r - fix this!" % names)
    if not names:
        # we currently never return an empty list, some code
        # might not be able to deal with it:
        names = [u'DoesNotExist', ]
    return names


def backend_to_index(meta, content, schema, wikiname, backend_name):
    """
    Convert backend metadata/data to a whoosh document.

    :param meta: revision meta from moin backend
    :param content: revision data converted to indexable content
    :param schema: whoosh schema
    :param wikiname: interwikiname of this wiki
    :returns: document to put into whoosh index
    """
    doc = dict([(str(key), value)
                for key, value in meta.items()
                if key in schema])
    for key in [MTIME, PTIME]:
        if key in doc:
            # we have UNIX UTC timestamp (int), whoosh wants datetime
            doc[key] = datetime.datetime.utcfromtimestamp(doc[key])
    doc[NAME_EXACT] = doc[NAME]
    doc[WIKINAME] = wikiname
    doc[CONTENT] = content
    doc[BACKENDNAME] = backend_name
    return doc


from MoinMoin.util.mime import Type, type_moin_document
from MoinMoin.util.tree import moin_page
from MoinMoin.converter import default_registry
from MoinMoin.util.iri import Iri


def convert_to_indexable(meta, data, item_name=None, is_new=False):
    """
    Convert revision data to a indexable content.

    :param meta: revision metadata (gets updated as a side effect)
    :param data: revision data (file-like)
                 please make sure that the content file is
                 ready to read all indexable content from it. if you have just
                 written that content or already read from it, you need to call
                 rev.seek(0) before calling convert_to_indexable(rev).
    :param is_new: if this is for a new revision and we shall modify
                   metadata as a side effect
    :returns: indexable content, text/plain, unicode object
    """
    class PseudoRev(object):
        def __init__(self, meta, data):
            self.meta = meta
            self.data = data
            self.revid = meta.get(REVID)

            class PseudoItem(object):
                def __init__(self, name):
                    self.name = name
            self.item = PseudoItem(item_name)

        def read(self, *args, **kw):
            return self.data.read(*args, **kw)

        def seek(self, *args, **kw):
            return self.data.seek(*args, **kw)

        def tell(self, *args, **kw):
            return self.data.tell(*args, **kw)

    if not item_name:
        item_name = get_names(meta)[0]

    rev = PseudoRev(meta, data)
    try:
        # TODO use different converter mode?
        # Maybe we want some special mode for the input converters so they emit
        # different output than for normal rendering), esp. for the non-markup
        # content types (images, etc.).
        input_contenttype = meta[CONTENTTYPE]
        output_contenttype = 'text/plain'
        type_input_contenttype = Type(input_contenttype)
        type_output_contenttype = Type(output_contenttype)
        reg = default_registry
        # first try a direct conversion (this could be useful for extraction
        # of (meta)data from binary types, like from images or audio):
        conv = reg.get(type_input_contenttype, type_output_contenttype)
        if conv:
            doc = conv(rev, input_contenttype)
            return doc
        # otherwise try via DOM as intermediate format (this is useful if
        # input type is markup, to get rid of the markup):
        input_conv = reg.get(type_input_contenttype, type_moin_document)
        refs_conv = reg.get(type_moin_document, type_moin_document, items='refs')
        output_conv = reg.get(type_moin_document, type_output_contenttype)
        if input_conv and output_conv:
            doc = input_conv(rev, input_contenttype)
            # We do not convert smileys, includes, macros, links, because
            # it does not improve search results or even makes results worse.
            # We do run the referenced converter, though, to extract links and
            # transclusions.
            if is_new:
                # we only can modify new, uncommitted revisions, not stored revs
                i = Iri(scheme='wiki', authority='', path='/' + item_name)
                doc.set(moin_page.page_href, unicode(i))
                refs_conv(doc)
                # side effect: we update some metadata:
                meta[ITEMLINKS] = refs_conv.get_links()
                meta[ITEMTRANSCLUSIONS] = refs_conv.get_transclusions()
                meta[EXTERNALLINKS] = refs_conv.get_external_links()
            doc = output_conv(doc)
            return doc
        # no way
        raise TypeError("No converter for {0} --> {1}".format(input_contenttype, output_contenttype))
    except Exception as e:  # catch all exceptions, we don't want to break an indexing run
        logging.exception("Exception happened in conversion of item {0!r} rev {1} contenttype {2}:".format(
                          item_name, meta.get(REVID, 'new'), meta.get(CONTENTTYPE, '')))
        doc = u'ERROR [{0!s}]'.format(e)
        return doc


class IndexingMiddleware(object):
    def __init__(self, index_storage, backend, wiki_name=None, acl_rights_contents=[], **kw):
        """
        Store params, create schemas.
        """
        self.index_storage = index_storage
        self.backend = backend
        self.wikiname = wiki_name
        self.ix = {}  # open indexes
        self.schemas = {}  # existing schemas

        common_fields = {
            # wikiname so we can have a shared index in a wiki farm, always check this!
            WIKINAME: ID(stored=True),
            # namespace, so we can have different namespaces within a wiki, always check this!
            NAMESPACE: ID(stored=True),
            # tokenized NAME from metadata - use this for manual searching from UI
            NAME: TEXT(stored=True, multitoken_query="and", analyzer=item_name_analyzer(), field_boost=2.0),
            # unmodified NAME from metadata - use this for precise lookup by the code.
            # also needed for wildcard search, so the original string as well as the query
            # (with the wildcard) is not cut into pieces.
            NAME_EXACT: ID(field_boost=3.0),
            # revision id (aka meta id)
            REVID: ID(unique=True, stored=True),
            # parent revision id
            PARENTID: ID(stored=True),
            # backend name (which backend is this rev stored in?)
            BACKENDNAME: ID(stored=True),
            # MTIME from revision metadata (converted to UTC datetime)
            MTIME: DATETIME(stored=True),
            # publish time from metadata (converted to UTC datetime)
            PTIME: DATETIME(stored=True),
            # ITEMTYPE from metadata, always matched exactly hence ID
            ITEMTYPE: ID(stored=True),
            # tokenized CONTENTTYPE from metadata
            CONTENTTYPE: TEXT(stored=True, multitoken_query="and", analyzer=MimeTokenizer()),
            # unmodified list of TAGS from metadata
            TAGS: ID(stored=True),
            LANGUAGE: ID(stored=True),
            # USERID from metadata
            USERID: ID(stored=True),
            # ADDRESS from metadata
            ADDRESS: ID(stored=True),
            # HOSTNAME from metadata
            HOSTNAME: ID(stored=True),
            # SIZE from metadata
            SIZE: NUMERIC(stored=True),
            # ACTION from metadata
            ACTION: ID(stored=True),
            # tokenized COMMENT from metadata
            COMMENT: TEXT(stored=True),
            # SUMMARY from metadata
            SUMMARY: TEXT(stored=True),
            # data (content), converted to text/plain and tokenized
            CONTENT: TEXT(stored=True),
        }

        latest_revs_fields = {
            # ITEMID from metadata - as there is only latest rev of same item here, it is unique
            ITEMID: ID(unique=True, stored=True),
            # unmodified list of ITEMLINKS from metadata
            ITEMLINKS: ID(stored=True),
            # unmodified list of ITEMTRANSCLUSIONS from metadata
            ITEMTRANSCLUSIONS: ID(stored=True),
            # tokenized ACL from metadata
            ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True),
        }
        latest_revs_fields.update(**common_fields)

        userprofile_fields = {
            # Note: email / openid (if given) should be unique, but we might
            # have lots of empty values if it is not given and thus it is NOT
            # unique overall! Wrongly declaring it unique would lead to whoosh
            # killing other users from index when update_document() is called!
            EMAIL: ID(stored=True),
            OPENID: ID(stored=True),
        }
        latest_revs_fields.update(**userprofile_fields)

        # XXX This is a highly adhoc way to support indexing of ticket items.
        ticket_fields = {
            'effort': NUMERIC(stored=True),
            'difficulty': NUMERIC(stored=True),
            'severity': NUMERIC(stored=True),
            'priority': NUMERIC(stored=True),
            'assigned_to': ID(stored=True),
            'superseded_by': ID(stored=True),
            'depends_on': ID(stored=True),
            'closed': BOOLEAN(stored=True),
        }
        latest_revs_fields.update(**ticket_fields)

        blog_entry_fields = {
        }
        latest_revs_fields.update(**blog_entry_fields)

        all_revs_fields = {
            ITEMID: ID(stored=True),
        }
        all_revs_fields.update(**common_fields)

        latest_revisions_schema = Schema(**latest_revs_fields)
        all_revisions_schema = Schema(**all_revs_fields)

        # Define dynamic fields
        dynamic_fields = [("*_id", ID(stored=True)),
                          ("*_text", TEXT(stored=True)),
                          ("*_keyword", KEYWORD(stored=True)),
                          ("*_numeric", NUMERIC(stored=True)),
                          ("*_datetime", DATETIME(stored=True)),
                          ("*_boolean", BOOLEAN(stored=True)),
                         ]

        # Adding dynamic fields to schemas
        for glob, field_type in dynamic_fields:
            latest_revisions_schema.add(glob, field_type, glob=True)
            all_revisions_schema.add(glob, field_type, glob=True)

        # schemas are needed by query parser and for index creation
        self.schemas[ALL_REVS] = all_revisions_schema
        self.schemas[LATEST_REVS] = latest_revisions_schema

        # what fields could whoosh result documents have (no matter whether all revs index
        # or latest revs index):
        self.common_fields = set(latest_revs_fields.keys()) & set(all_revs_fields.keys())

    def get_storage_params(self, tmp=False):
        kind, params, kw = self.index_storage
        params, kw = list(params), dict(kw)  # better make a (mutable) copy
        if kind == WHOOSH_FILESTORAGE:
            # index_storage = 'FileStorage', (index_dir, ), {}
            if tmp:
                params[0] += '.temp'
            from whoosh.filedb.filestore import FileStorage
            cls = FileStorage
        else:
            raise ValueError("index_storage = {0!r} is not supported!".format(kind))
        return kind, cls, params, kw

    def get_storage(self, tmp=False, create=False):
        """
        Get the whoosh storage (whoosh supports different kinds of storage,
        e.g. to filesystem or to GAE).
        Currently we only support the FileStorage.
        """
        kind, cls, params, kw = self.get_storage_params(tmp)
        if kind == WHOOSH_FILESTORAGE:
            if create:
                index_dir = params[0]
                try:
                    os.mkdir(index_dir)
                except:
                    # ignore exception, we'll get another exception below
                    # in case there are problems with the index_dir
                    pass
        return cls(*params, **kw)

    def open(self):
        """
        Open all indexes.
        """
        storage = self.get_storage()
        for name in INDEXES:
            self.ix[name] = storage.open_index(name)

    def close(self):
        """
        Close all indexes.
        """
        for name in self.ix:
            self.ix[name].close()
        self.ix = {}

    def create(self, tmp=False):
        """
        Create all indexes (empty).
        """
        storage = self.get_storage(tmp, create=True)
        for name in INDEXES:
            storage.create_index(self.schemas[name], indexname=name)

    def destroy(self, tmp=False):
        """
        Destroy all indexes.
        """
        # XXX this is whoosh backend specific and currently only works for FileStorage.
        kind, cls, params, kw = self.get_storage_params(tmp)
        if kind == WHOOSH_FILESTORAGE:
            index_dir = params[0]
            if os.path.exists(index_dir):
                shutil.rmtree(index_dir)

    def move_index(self):
        """
        Move freshly built indexes from tmp storage to normal storage
        """
        # XXX this is whoosh backend specific and currently only works for FileStorage.
        kind, cls, params, kw = self.get_storage_params(False)
        if kind == WHOOSH_FILESTORAGE:
            _, _, params_tmp, _ = self.get_storage_params(True)
            self.destroy()
            index_dir, index_dir_tmp = params[0], params_tmp[0]
            os.rename(index_dir_tmp, index_dir)

    def index_revision(self, meta, content, backend_name, async=False):  # True
        """
        Index a single revision, add it to all-revs and latest-revs index.

        :param meta: metadata dict
        :param content: preprocessed (filtered) indexable content
        :param async: if True, use the AsyncWriter, otherwise use normal writer
        """
        doc = backend_to_index(meta, content, self.schemas[ALL_REVS], self.wikiname, backend_name)
        if async:
            writer = AsyncWriter(self.ix[ALL_REVS])
        else:
            writer = self.ix[ALL_REVS].writer()
        with writer as writer:
            writer.update_document(**doc)  # update, because store_revision() may give us an existing revid
        doc = backend_to_index(meta, content, self.schemas[LATEST_REVS], self.wikiname, backend_name)
        if async:
            writer = AsyncWriter(self.ix[LATEST_REVS])
        else:
            writer = self.ix[LATEST_REVS].writer()
        with writer as writer:
            writer.update_document(**doc)

    def remove_revision(self, revid, async=True):
        """
        Remove a single revision from indexes.
        """
        if async:
            writer = AsyncWriter(self.ix[ALL_REVS])
        else:
            writer = self.ix[ALL_REVS].writer()
        with writer as writer:
            writer.delete_by_term(REVID, revid)
        if async:
            writer = AsyncWriter(self.ix[LATEST_REVS])
        else:
            writer = self.ix[LATEST_REVS].writer()
        with writer as writer:
            # find out itemid related to the revid we want to remove:
            with self.ix[LATEST_REVS].searcher() as searcher:
                docnum_remove = searcher.document_number(revid=revid)
                if docnum_remove is not None:
                    itemid = searcher.stored_fields(docnum_remove)[ITEMID]
            if docnum_remove is not None:
                # we are removing a revid that is in latest revs index
                latest_backends_revids = self._find_latest_backends_revids(self.ix[ALL_REVS], Term(ITEMID, itemid))
                if latest_backends_revids:
                    # we have a latest revision, just update the document in the index:
                    assert len(latest_backends_revids) == 1  # this item must have only one latest revision
                    latest_backend_revid = latest_backends_revids[0]
                    # we must fetch from backend because schema for LATEST_REVS is different than for ALL_REVS
                    # (and we can't be sure we have all fields stored, too)
                    meta, _ = self.backend.retrieve(*latest_backend_revid)
                    # we only use meta (not data), because we do not want to transform data->content again (this
                    # is potentially expensive) as we already have the transformed content stored in ALL_REVS index:
                    with self.ix[ALL_REVS].searcher() as searcher:
                        doc = searcher.document(revid=latest_backend_revid[1])
                        content = doc[CONTENT]
                    doc = backend_to_index(meta, content, self.schemas[LATEST_REVS], self.wikiname,
                                           backend_name=latest_backend_revid[0])
                    writer.update_document(**doc)
                else:
                    # this is no revision left in this item that could be the new "latest rev", just kill the rev
                    writer.delete_document(docnum_remove)

    def _modify_index(self, index, schema, wikiname, revids, mode='add', procs=1, limitmb=256):
        """
        modify index contents - add, update, delete the indexed documents for all given revids

        Note: mode == 'add' is faster but you need to make sure to not create duplicate
              documents in the index.
        """
        with index.writer(procs=procs, limitmb=limitmb) as writer:
            for backend_name, revid in revids:
                if mode in ['add', 'update', ]:
                    meta, data = self.backend.retrieve(backend_name, revid)
                    content = convert_to_indexable(meta, data, is_new=False)
                    doc = backend_to_index(meta, content, schema, wikiname, backend_name)
                if mode == 'update':
                    writer.update_document(**doc)
                elif mode == 'add':
                    writer.add_document(**doc)
                elif mode == 'delete':
                    writer.delete_by_term(REVID, revid)
                else:
                    raise ValueError("mode must be 'update', 'add' or 'delete', not '{0}'".format(mode))

    def _find_latest_backends_revids(self, index, query=None):
        """
        find the latest revision identifiers using the all-revs index

        :param index: an up-to-date and open ALL_REVS index
        :param query: query to search only specific revisions (optional, default: all items/revisions)
        :returns: a list of tuples (backend name, latest revid)
        """
        if query is None:
            query = Every()
        with index.searcher() as searcher:
            result = searcher.search(query, groupedby=ITEMID, sortedby=FieldFacet(MTIME, reverse=True))
            by_item = result.groups(ITEMID)
            # values in v list are in same relative order as in results, so latest MTIME is first:
            latest_backends_revids = [(searcher.stored_fields(v[0])[BACKENDNAME],
                                      searcher.stored_fields(v[0])[REVID])
                                      for v in by_item.values()]
        return latest_backends_revids

    def rebuild(self, tmp=False, procs=1, limitmb=256):
        """
        Add all items/revisions from the backends of this wiki to the index
        (which is expected to have no items/revisions from this wiki yet).

        Note: index might be shared by multiple wikis, so it is:
              create, rebuild wiki1, rebuild wiki2, ...
              create (tmp), rebuild wiki1, rebuild wiki2, ..., move
        """
        storage = self.get_storage(tmp)
        index = storage.open_index(ALL_REVS)
        try:
            # build an index of all we have (so we know what we have)
            all_revids = self.backend  # the backend is an iterator over all revids
            self._modify_index(index, self.schemas[ALL_REVS], self.wikiname, all_revids, 'add', procs, limitmb)
            latest_backends_revids = self._find_latest_backends_revids(index)
        finally:
            index.close()
        # now build the index of the latest revisions:
        index = storage.open_index(LATEST_REVS)
        try:
            self._modify_index(index, self.schemas[LATEST_REVS], self.wikiname, latest_backends_revids, 'add',
                               procs, limitmb)
        finally:
            index.close()

    def update(self, tmp=False):
        """
        Make sure index reflects current backend state, add missing stuff, remove outdated stuff.

        This is intended to be used:
        * after a full rebuild that was done at tmp location
        * after wiki is made read-only or taken offline
        * after the index was moved to the normal index location

        Reason: new revisions that were created after the rebuild started might be missing in new index.

        :returns: index changed (bool)
        """
        storage = self.get_storage(tmp)
        index_all = storage.open_index(ALL_REVS)
        try:
            # NOTE: self.backend iterator gives (backend_name, revid) tuples, which is NOT
            # the same as (name, revid), thus we do the set operations just on the revids.
            # first update ALL_REVS index:
            revids_backends = dict((revid, backend_name) for backend_name, revid in self.backend)
            backend_revids = set(revids_backends)
            with index_all.searcher() as searcher:
                ix_revids_backends = dict((doc[REVID], doc[BACKENDNAME]) for doc in searcher.all_stored_fields())
            revids_backends.update(ix_revids_backends)  # this is needed for stuff that was deleted from storage
            ix_revids = set(ix_revids_backends)
            add_revids = backend_revids - ix_revids
            del_revids = ix_revids - backend_revids
            changed = add_revids or del_revids
            add_revids = [(revids_backends[revid], revid) for revid in add_revids]
            del_revids = [(revids_backends[revid], revid) for revid in del_revids]
            self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, add_revids, 'add')
            self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, del_revids, 'delete')

            backend_latest_backends_revids = set(self._find_latest_backends_revids(index_all))
        finally:
            index_all.close()
        index_latest = storage.open_index(LATEST_REVS)
        try:
            # now update LATEST_REVS index:
            with index_latest.searcher() as searcher:
                ix_revids = set(doc[REVID] for doc in searcher.all_stored_fields())
            backend_latest_revids = set(revid for name, revid in backend_latest_backends_revids)
            upd_revids = backend_latest_revids - ix_revids
            upd_revids = [(revids_backends[revid], revid) for revid in upd_revids]
            self._modify_index(index_latest, self.schemas[LATEST_REVS], self.wikiname, upd_revids, 'update')
            self._modify_index(index_latest, self.schemas[LATEST_REVS], self.wikiname, del_revids, 'delete')
        finally:
            index_latest.close()
        return changed

    def optimize_backend(self):
        """
        Optimize backend / collect garbage to safe space:

        * deleted items: destroy them? use a deleted_max_age?
        * user profiles: only keep latest revision?
        * normal wiki items: keep by max_revisions_count / max_age
        * deduplicate data (determine dataids with same hash, fix references to point to one of them)
        * remove unreferenced dataids (destroyed revisions, deduplicated stuff)
        """
        # TODO

    def optimize_index(self, tmp=False):
        """
        Optimize whoosh index.
        """
        storage = self.get_storage(tmp)
        for name in INDEXES:
            ix = storage.open_index(name)
            try:
                ix.optimize()
            finally:
                ix.close()

    def dump(self, tmp=False, idx_name=LATEST_REVS):
        """
        Yield key/value tuple lists for all documents in the indexes, fields sorted.
        """
        storage = self.get_storage(tmp)
        ix = storage.open_index(idx_name)
        try:
            with ix.searcher() as searcher:
                for doc in searcher.all_stored_fields():
                    name = doc.pop(NAME, u"")
                    content = doc.pop(CONTENT, u"")
                    yield [(NAME, name), ] + sorted(doc.items()) + [(CONTENT, content), ]
        finally:
            ix.close()

    def query_parser(self, default_fields, idx_name=LATEST_REVS):
        """
        Build a query parser for a list of default fields.
        """
        schema = self.schemas[idx_name]
        if len(default_fields) > 1:
            qp = MultifieldParser(default_fields, schema=schema)
        elif len(default_fields) == 1:
            qp = QueryParser(default_fields[0], schema=schema)
        else:
            raise ValueError("default_fields list must at least contain one field name")
        qp.add_plugin(RegexPlugin())

        def userid_pseudo_field_factory(fieldname):
            """generate a translator function, that searches for the userid
               in the given fieldname when provided with the username
            """
            def userid_pseudo_field(node):
                username = node.text
                users = user.search_users(**{NAME_EXACT: username})
                if users:
                    userid = users[0].meta[ITEMID]
                    node = WordNode(userid)
                    node.set_fieldname(fieldname)
                    return node
                return node
            return userid_pseudo_field
        qp.add_plugin(PseudoFieldPlugin(dict(
            # username:JoeDoe searches for revisions modified by JoeDoe
            username=userid_pseudo_field_factory(keys.USERID),
            # assigned:JoeDoe searches for tickets assigned to JoeDoe
            assigned=userid_pseudo_field_factory('assigned_to'),  # XXX should be keys.ASSIGNED_TO
        )))
        return qp

    def search(self, q, idx_name=LATEST_REVS, **kw):
        """
        Search with query q, yield Revisions.
        """
        with self.ix[idx_name].searcher() as searcher:
            # Note: callers must consume everything we yield, so the for loop
            # ends and the "with" is left to close the index files.
            for hit in searcher.search(q, **kw):
                doc = hit.fields()
                latest_doc = doc if idx_name == LATEST_REVS else None
                item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
                yield item.get_revision(doc[REVID], doc=doc)

    def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw):
        """
        Same as search, but with paging support.
        """
        with self.ix[idx_name].searcher() as searcher:
            # Note: callers must consume everything we yield, so the for loop
            # ends and the "with" is left to close the index files.
            for hit in searcher.search_page(q, pagenum, pagelen=pagelen, **kw):
                doc = hit.fields()
                latest_doc = doc if idx_name == LATEST_REVS else None
                item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
                yield item.get_revision(doc[REVID], doc=doc)

    def documents(self, idx_name=LATEST_REVS, **kw):
        """
        Yield Revisions matching the kw args.
        """
        for doc in self._documents(idx_name, **kw):
            latest_doc = doc if idx_name == LATEST_REVS else None
            item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
            yield item.get_revision(doc[REVID], doc=doc)

    def _documents(self, idx_name=LATEST_REVS, **kw):
        """
        Yield documents matching the kw args (internal use only).

        If no kw args are given, this yields all documents.
        """
        with self.ix[idx_name].searcher() as searcher:
            # Note: callers must consume everything we yield, so the for loop
            # ends and the "with" is left to close the index files.
            for doc in searcher.documents(**kw):
                yield doc

    def document(self, idx_name=LATEST_REVS, **kw):
        """
        Return a Revision matching the kw args.
        """
        doc = self._document(idx_name, **kw)
        if doc:
            latest_doc = doc if idx_name == LATEST_REVS else None
            item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
            return item.get_revision(doc[REVID], doc=doc)

    def _document(self, idx_name=LATEST_REVS, **kw):
        """
        Return a document matching the kw args (internal use only).
        """
        with self.ix[idx_name].searcher() as searcher:
            return searcher.document(**kw)

    def has_item(self, name):
        item = self[name]
        return bool(item)

    def __getitem__(self, name):
        """
        Return item with <name> (may be a new or existing item).
        """
        return Item(self, **{NAME_EXACT: name})

    def get_item(self, **query):
        """
        Return item identified by the query (may be a new or existing item).

        :kwargs **query: e.g. name_exact=u"Foo" or itemid="..." or ...
                         (must be a unique fieldname=value for the latest-revs index)
        """
        return Item(self, **query)

    def create_item(self, **query):
        """
        Return item identified by the query (must be a new item).

        :kwargs **query: e.g. name_exact=u"Foo" or itemid="..." or ...
                         (must be a unique fieldname=value for the latest-revs index)
        """
        return Item.create(self, **query)

    def existing_item(self, **query):
        """
        Return item identified by query (must be an existing item).

        :kwargs **query: e.g. name_exact=u"Foo" or itemid="..." or ...
                         (must be a unique fieldname=value for the latest-revs index)
        """
        return Item.existing(self, **query)


class Item(object):
    def __init__(self, indexer, latest_doc=None, **query):
        """
        :param indexer: indexer middleware instance
        :param latest_doc: if caller already has a latest-revs index whoosh document
                           it can be given there, to avoid us fetching same doc again
                           from the index
        :kwargs **query: any unique fieldname=value for the latest-revs index, e.g.:
                         name_exact="foo" or itemid="....." to fetch the item's current
                         doc from the index (if not given via latest_doc).
        """
        self.indexer = indexer
        self.backend = self.indexer.backend
        self._name = query.get(NAME_EXACT)
        if latest_doc is None:
            # we need to call the method without acl check to avoid endless recursion:
            latest_doc = self.indexer._document(**query)
            if latest_doc is None:
                # no such item, create a dummy doc that has a NAME entry to
                # avoid issues in the name(s) property code. if this was a
                # lookup for some specific item (using a name_exact query), we
                # put that name into the NAME list, otherwise it'll be empty:
                if self._name is not None:
                    names = [self._name, ]
                else:
                    names = []
                latest_doc = {NAME: names}
        self._current = latest_doc

    def _get_itemid(self):
        return self._current.get(ITEMID)

    def _set_itemid(self, value):
        self._current[ITEMID] = value
    itemid = property(_get_itemid, _set_itemid)

    @property
    def acl(self):
        return self._current.get(ACL)

    @property
    def namespace(self):
        return self._current.get(NAMESPACE)

    @property
    def ptime(self):
        dt = self._current.get(PTIME)
        if dt is not None:
            return utctimestamp(dt)

    @property
    def names(self):
        return get_names(self._current)

    @property
    def parentnames(self):
        """
        compute list of parent names (same order as in names, but no dupes)

        :return: parent names (list of unicode)
        """
        parent_names = []
        for name in self.names:
            parentname_tail = name.rsplit('/', 1)
            if len(parentname_tail) == 2:
                parent_name = parentname_tail[0]
                if parent_name not in parent_names:
                    parent_names.append(parent_name)
        return parent_names

    @property
    def parentids(self):
        """
        compute list of parent itemids

        :return: parent itemids (set)
        """
        parent_ids = set()
        for parent_name in self.parentnames:
            rev = self.indexer._document(idx_name=LATEST_REVS, **{NAME_EXACT: parent_name})
            if rev:
                parent_ids.add(rev[ITEMID])
        return parent_ids

    @property
    def mtime(self):
        dt = self._current.get(MTIME)
        if dt is not None:
            return utctimestamp(dt)

    @property
    def name(self):
        if self._name and self._name in self.names:
            name = self._name
        else:
            try:
                name = self.names[0]
            except IndexError:
                # empty name list, no name:
                name = None
        assert name is None or isinstance(name, unicode)
        return name

    def _fqname(self, name):
        """
        return the fully qualified name including the namespace: NS:NAME
        """
        ns = self.namespace
        name = name or u''
        if ns:
            fqn = ns + u':' + name
        else:
            fqn = name
        assert isinstance(fqn, unicode)
        return fqn

    @property
    def fqname(self):
        """
        return the fully qualified name including the namespace: NS:NAME
        """
        return self._fqname(self.name)

    @property
    def fqnames(self):
        """
        return the fully qualified names including the namespace: NS:NAME
        """
        return [self._fqname(name) for name in self.names]

    @property
    def fqparentnames(self):
        """
        return the fully qualified parent names including the namespace: NS:NAME
        """
        return [self._fqname(name) for name in self.parentnames]

    @classmethod
    def create(cls, indexer, **query):
        """
        Create a new item and return it, raise exception if it already exists.
        """
        item = cls(indexer, **query)
        if not item:
            return item
        raise ItemAlreadyExistsError(repr(query))

    @classmethod
    def existing(cls, indexer, **query):
        """
        Get an existing item and return it, raise exception if it does not exist.
        """
        item = cls(indexer, **query)
        if item:
            return item
        raise NoSuchItemError(repr(query))

    def __nonzero__(self):
        """
        Item exists (== has at least one revision)?
        """
        return self.itemid is not None

    def iter_revs(self):
        """
        Iterate over Revisions belonging to this item.
        """
        if self:
            for rev in self.indexer.documents(idx_name=ALL_REVS, itemid=self.itemid):
                yield rev

    def __getitem__(self, revid):
        """
        Get Revision with revision id <revid>.
        """
        return Revision(self, revid)

    def get_revision(self, revid, doc=None):
        """
        Similar to item[revid], but you can optionally give an already existing
        whoosh result document for the given revid to avoid backend accesses for some use cases.
        """
        return Revision(self, revid, doc)

    def preprocess(self, meta, data):
        """
        preprocess a revision before it gets stored and put into index.
        """
        content = convert_to_indexable(meta, data, self.name, is_new=True)
        return meta, data, content

    def store_revision(self, meta, data, overwrite=False,
                       trusted=False,  # True for loading a serialized representation or other trusted sources
                       name=None,  # TODO name we decoded from URL path
                       action=u'SAVE',
                       remote_addr=None,
                       userid=None,
                       wikiname=None,
                       contenttype_current=None,
                       contenttype_guessed=None,
                       acl_parent=None,
                       return_rev=False,
                       ):
        """
        Store a revision into the backend, write metadata and data to it.

        Usually this will be a new revision, either of an existing item or
        a new item. With overwrite mode, we can also store over existing
        revisions.

        :type meta: dict
        :type data: open file (file must be closed by caller)
        :param overwrite: if True, allow overwriting of existing revs.
        :param return_rev: if True, return a Revision instance of the just created revision
        :returns: a Revision instance or None
        """
        if remote_addr is None:
            try:
                # if we get here outside a request, this won't work:
                remote_addr = unicode(request.remote_addr)
            except:
                pass
        if userid is None:
            try:
                # if we get here outside a request, this won't work:
                userid = flaskg.user.valid and flaskg.user.itemid or None
            except:
                pass
        if wikiname is None:
            wikiname = app.cfg.interwikiname
        state = {'trusted': trusted,
                 keys.NAME: [name],
                 keys.ACTION: action,
                 keys.ADDRESS: remote_addr,
                 keys.USERID: userid,
                 keys.WIKINAME: wikiname,
                 keys.NAMESPACE: None,
                 keys.ITEMID: self.itemid,  # real itemid or None
                 'contenttype_current': contenttype_current,
                 'contenttype_guessed': contenttype_guessed,
                 'acl_parent': acl_parent,
                }
        ct = meta.get(keys.CONTENTTYPE)
        if ct == CONTENTTYPE_USER:
            Schema = UserMetaSchema
        else:
            Schema = ContentMetaSchema
        m = Schema(meta)
        valid = m.validate(state)
        # TODO: currently we just log validation results. in the end we should
        # reject invalid stuff in some comfortable way.
        if not valid:
            logging.warning("metadata validation failed, see below")
            for e in m.children:
                logging.warning("{0}, {1}".format(e.valid, e))

        # we do not have anything in m that is not defined in the schema,
        # e.g. userdefined meta keys or stuff we do not validate. thus, we
        # just update the meta dict with the validated stuff:
        meta.update(dict(m.value.items()))
        # we do not want None / empty values:
        # XXX do not kick out empty lists before fixing NAME processing:
        meta = dict([(k, v) for k, v in meta.items() if v not in [None, ]])

        if self.itemid is None:
            self.itemid = meta[ITEMID]
        backend = self.backend
        if not overwrite:
            revid = meta.get(REVID)
            if revid is not None and revid in backend:
                raise ValueError('need overwrite=True to overwrite existing revisions')
        meta, data, content = self.preprocess(meta, data)
        data.seek(0)  # rewind file
        backend_name, revid = backend.store(meta, data)
        meta[REVID] = revid
        self.indexer.index_revision(meta, content, backend_name)
        if not overwrite:
            self._current = self.indexer._document(revid=revid)
        if return_rev:
            return Revision(self, revid)

    def store_all_revisions(self, meta, data):
        """
        Store over all revisions of this item.
        """
        for rev in self.iter_revs():
            meta[REVID] = rev.revid
            self.store_revision(meta, data, overwrite=True)

    def destroy_revision(self, revid):
        """
        Destroy revision <revid>.
        """
        rev = Revision(self, revid)
        self.backend.remove(rev.backend_name, revid)
        self.indexer.remove_revision(revid)

    def destroy_all_revisions(self):
        """
        Destroy all revisions of this item.
        """
        for rev in self.iter_revs():
            self.destroy_revision(rev.revid)


class Revision(object):
    """
    An existing revision (exists in the backend).
    """
    def __init__(self, item, revid, doc=None, name=None):
        is_current = revid == CURRENT
        if doc is None:
            if is_current:
                doc = item._current
            else:
                doc = item.indexer._document(idx_name=ALL_REVS, revid=revid)
                if doc is None:
                    raise KeyError
        if is_current:
            revid = doc.get(REVID)
            if revid is None:
                raise KeyError
        self.item = item
        self.revid = revid
        self.backend = item.backend
        self.backend_name = doc[BACKENDNAME]
        self._doc = doc
        self.meta = Meta(self, self._doc)
        self._data = None
        if name and name in self.names:
            self._name = name
        else:
            self._name = None
        # Note: this does not immediately raise a KeyError for non-existing revs any more
        # If you access data or meta, it will, though.

    @property
    def names(self):
        return get_names(self.meta)

    @property
    def name(self):
        name = self._name
        if name is None:
            try:
                name = self.names[0]
            except IndexError:
                # empty name list, no name:
                name = None
        return name

    def set_context(self, context):
        for name in self.names:
            if name.startswith(context):
                self._name = name
                return

    def _load(self):
        meta, data = self.backend.retrieve(self.backend_name, self.revid)  # raises KeyError if rev does not exist
        self.meta = Meta(self, self._doc, meta)
        self._data = data
        return meta, data

    @property
    def data(self):
        if self._data is None:
            self._load()
        return self._data

    def close(self):
        if self._data is not None:
            self._data.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_tb):
        self.close()

    def __cmp__(self, other):
        return cmp(self.meta, other.meta)


from collections import Mapping


class Meta(Mapping):
    def __init__(self, revision, doc, meta=None):
        self.revision = revision
        self._doc = doc or {}
        self._meta = meta or {}
        self._common_fields = revision.item.indexer.common_fields

    def __contains__(self, key):
        try:
            self[key]
        except KeyError:
            return False
        else:
            return True

    def __iter__(self):
        self._meta, _ = self.revision._load()
        return iter(self._meta)

    def __getitem__(self, key):
        if self._meta:
            # we have real metadata (e.g. from storage)
            return self._meta[key]
        elif self._doc and key in self._common_fields:
            # we have a result document from whoosh, which has quite a lot
            # of the usually wanted metadata, avoid storage access, use this.
            value = self._doc[key]
            if key in [MTIME, PTIME]:
                # whoosh has a datetime object, but we want a UNIX timestamp
                value = utctimestamp(value)
            return value
        else:
            self._meta, _ = self.revision._load()
            return self._meta[key]

    def __cmp__(self, other):
        if self[REVID] == other[REVID]:
            return 0
        return cmp(self[MTIME], other[MTIME])

    def __len__(self):
        return 0  # XXX

    def __repr__(self):
        return "Meta _doc: {0!r} _meta: {1!r}".format(self._doc, self._meta)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.