Commits

izazi committed d8b4b5f

Added the merge functionality to the DRPC query. Persisting the document contents to HDFS via avro

Comments (0)

Files changed (21)

dropAndCreateSchema.txt

-drop keyspace trident_test;
-create keyspace trident_test
+drop keyspace storm;
+create keyspace storm
     with strategy_options = [{replication_factor:1}]
     and placement_strategy = 'org.apache.cassandra.locator.SimpleStrategy';
 
-use trident_test;
-create column family tfid
+use storm;
+create column family tfidf
+    with comparator = AsciiType
+    and default_validation_class = 'UTF8Type'
+    and key_validation_class = 'UTF8Type';
+    
+use storm;
+create column family tfidfbatch
     with comparator = AsciiType
     and default_validation_class = 'UTF8Type'
     and key_validation_class = 'UTF8Type';

multilang/resources/cassandra-template.yaml

-# Cassandra storage config YAML
-cluster_name: '$CLUSTER$'
-initial_token: 0
-auto_bootstrap: false
-hinted_handoff_enabled: false
-max_hint_window_in_ms: 3600000 # one hour
-hinted_handoff_throttle_delay_in_ms: 50
-authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
-authority: org.apache.cassandra.auth.AllowAllAuthority
-partitioner: org.apache.cassandra.dht.RandomPartitioner
-data_file_directories:
-    - $DIR$/data
-commitlog_directory: $DIR$/commitlog
-saved_caches_directory: $DIR$/saved_caches
-commitlog_sync: periodic
-commitlog_sync_period_in_ms: 10000
-seed_provider:
-    # Addresses of hosts that are deemed contact points. 
-    # Cassandra nodes use this list of hosts to find each other and learn
-    # the topology of the ring.  You must change this if you are running
-    # multiple nodes!
-    - class_name: org.apache.cassandra.locator.SimpleSeedProvider
-      parameters:
-          # seeds is actually a comma-delimited list of addresses.
-          - seeds: "127.0.0.1"
-flush_largest_memtables_at: 0.75
-reduce_cache_sizes_at: 0.85
-reduce_cache_capacity_to: 0.6
-concurrent_reads: 32
-concurrent_writes: 32
-memtable_flush_queue_size: 4
-storage_port: $STORAGE_PORT$
-listen_address: 127.0.0.1
-rpc_address: 127.0.0.1
-rpc_port: $PORT$
-rpc_keepalive: true
-thrift_framed_transport_size_in_mb: 15
-thrift_max_message_length_in_mb: 16
-incremental_backups: false
-snapshot_before_compaction: false
-column_index_size_in_kb: 64
-in_memory_compaction_limit_in_mb: 64
-compaction_throughput_mb_per_sec: 16
-compaction_preheat_key_cache: true
-rpc_timeout_in_ms: 10000
-endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
-dynamic_snitch: true
-dynamic_snitch_update_interval_in_ms: 100 
-dynamic_snitch_reset_interval_in_ms: 600000
-dynamic_snitch_badness_threshold: 0.0
-request_scheduler: org.apache.cassandra.scheduler.NoScheduler
-index_interval: 128