1. Joe Heck
  2. BSSAXTweetParserDemo

Source

BSSAXTweetParserDemo / BSSAXTweetParser.m

//
//  BSSAXTweetParser.m
//  BSSAXTweetParserDemo
//
//  Created by Brent Simmons on 6/3/10.
//  This code is public domain.
//

#import "BSSAXTweetParser.h"
#import "BSTweet.h"
#import "BSUser.h"


/* See this Apple sample code for where I got my start doing SAX + libxml:
 http://developer.apple.com/iphone/library/samplecode/XMLPerformance/Introduction/Intro.html */


@interface BSSAXTweetParser ()

@property (nonatomic, retain, readonly) NSMutableData *characters;
@property (nonatomic, retain, readwrite) NSMutableArray *tweets;
@property (nonatomic, retain) BSTweet *currentTweet;
@property (nonatomic, assign) BOOL storingCharacters;
@property (nonatomic, assign) BOOL parsingTweet;
@property (nonatomic, assign) BOOL parsingUser;

- (void)freeParser;

@end


@implementation BSSAXTweetParser

@synthesize characters;
@synthesize storingCharacters;
@synthesize tweets;
@synthesize currentTweet;
@synthesize parsingTweet;
@synthesize parsingUser;


#pragma mark Init

- (id)init {
	self = [super init];
	if (self != nil) {
		characters = [[NSMutableData data] retain];
		tweets = [[NSMutableArray array] retain];
	}
	return self;
}


#pragma mark Dealloc

- (void)dealloc {
	[self freeParser];
	[characters release];
	[tweets release];
	[currentTweet release];
	[super dealloc];
}


#pragma mark Parser public interface

- (void)parseData:(NSData *)entireXMLDocument {
	[self startParsing:entireXMLDocument];
	[self endParsing];
}


static xmlSAXHandler saxHandlerStruct;

- (void)startParsing:(NSData *)initialChunk {
	xmlParserContext = xmlCreatePushParserCtxt(&saxHandlerStruct, self, nil, 0, nil);
	xmlCtxtUseOptions(xmlParserContext, XML_PARSE_NOENT);
	[self parseChunk:initialChunk];
}


- (void)parseChunk:(NSData *)chunk {
	xmlParseChunk(xmlParserContext, (const char *)[chunk bytes], [chunk length], 0);
}


- (void)endParsing {
    xmlParseChunk(xmlParserContext, nil, 0, 1);
	[self freeParser];
}


- (void)abortParsing {
	xmlStopParser(xmlParserContext);
	[self freeParser];
}


#pragma mark Parser

- (void)freeParser {
	if (xmlParserContext != nil) {
		xmlFreeParserCtxt(xmlParserContext);
		xmlParserContext = nil;
	}
}


#pragma mark Character Buffer

- (void)appendCharacters:(const char *)charactersFound length:(NSInteger)length {
	if (self.storingCharacters)
		[self.characters appendBytes:charactersFound length:length];
}


- (void)startStoringCharacters {
	self.storingCharacters = YES;
	[self.characters setLength:0];
}


- (void)stopStoringCharactersAndMakeItEmpty {
	self.storingCharacters = NO;
	[self.characters setLength:0];
}


- (NSString *)currentString {
	if ([self.characters length] < 1)
		return nil;
	return [[[NSString alloc] initWithData:self.characters encoding:NSUTF8StringEncoding] autorelease];
}


#pragma mark Tweets

- (void)addTweet {
	BSTweet *tweet = [[[BSTweet alloc] init] autorelease];
	[self.tweets addObject:tweet];
	self.currentTweet = tweet;
}


#pragma mark SAX Callbacks

static const char *kStatusTag = "status"; //<status>starts a new tweet
static const NSUInteger kStatusTagLength = 7; //+1
static const char *kUserTag = "user"; //<user> is inside a <status> structure
static const NSUInteger kUserTagLength = 5;

/*Tags we care about*/

static const char *kCreatedAtTag = "created_at";
static const NSUInteger kCreatedAtTagLength = 11;
static const char *kIDTag = "id";
static const NSUInteger kIDTagLength = 3;
static const char *kTextTag = "text";
static const NSUInteger kTextTagLength = 5;
static const char *kSourceTag = "source";
static const NSUInteger kSourceTagLength = 7;
static const char *kInReplyToStatusIDTag = "in_reply_to_status_id";
static const NSUInteger kInReplyToStatusIDTagLength = 22;
static const char *kInReplyToUserIDTag = "in_reply_to_user_id";
static const NSUInteger kInReplyToUserIDTagLength = 20;
static const char *kInReplyToScreenNameTag = "in_reply_to_screen_name";
static const NSUInteger kInReplyToScreenNameTagLength = 24;
static const char *kNameTag = "name";
static const NSUInteger kNameTagLength = 5;
static const char *kScreenNameTag = "screen_name";
static const NSUInteger kScreenNameTagLength = 12;
static const char *kLocationTag = "location";
static const NSUInteger kLocationTagLength = 9;
static const char *kDescriptionTag = "description";
static const NSUInteger kDescriptionTagLength = 12;
static const char *kProfileImageURLTag = "profile_image_url";
static const NSUInteger kProfileImageURLTagLength = 18;
static const char *kURLTag = "url";
static const NSUInteger kURLTagLength = 4;
static const char *kFollowersCountTag = "followers_count";
static const NSUInteger kFollowersCountTagLength = 16;

#define xmlEqualTags(localName, tag, tagLength) !strncmp((const char *)localName, tag, tagLength)

- (void)processUserTag:(const xmlChar *)localName {
	if (xmlEqualTags(localName, kUserTag, kUserTagLength)) {
		self.parsingUser = NO;
		return;
	}
	if (xmlEqualTags(localName, kIDTag, kIDTagLength))
		self.currentTweet.user.userID = [self currentString]; //Marcus, if you got this far -- yes, I love dot syntax. :)
	else if (xmlEqualTags(localName, kNameTag, kNameTagLength))
		self.currentTweet.user.name = [self currentString];
	else if (xmlEqualTags(localName, kScreenNameTag, kScreenNameTagLength))
		self.currentTweet.user.screenName = [self currentString];
	else if (xmlEqualTags(localName, kLocationTag, kLocationTagLength))
		self.currentTweet.user.location = [self currentString];
	else if (xmlEqualTags(localName, kDescriptionTag, kDescriptionTagLength))
		self.currentTweet.user.userDescription = [self currentString];
	else if (xmlEqualTags(localName, kProfileImageURLTag, kProfileImageURLTagLength))
		self.currentTweet.user.profileImageURL = [self currentString];
	else if (xmlEqualTags(localName, kURLTag, kURLTagLength))
		self.currentTweet.user.url = [self currentString];
}


- (void)processStatusTag:(const xmlChar *)localName {
	if (xmlEqualTags(localName, kStatusTag, kStatusTagLength)) {
		self.parsingTweet = NO;
		self.currentTweet = nil;
		/*Ideally you would call a delegate here with the tweet just finished parsing.
		 You wouldn't store that tweet in an array: you'd release it after the delegate returns.
		 This way you don't have to keep an entire stream of tweets in memory.
		 Awesome for iPhone and iPad to keep memory use super-low.*/
		return;
	}
	if (xmlEqualTags(localName, kCreatedAtTag, kCreatedAtTagLength))
		self.currentTweet.createdAt = [self currentString];
	else if (xmlEqualTags(localName, kIDTag, kIDTagLength))
		self.currentTweet.statusID = [self currentString];
	else if (xmlEqualTags(localName, kTextTag, kTextTagLength))
		self.currentTweet.text = [self currentString];
	else if (xmlEqualTags(localName, kSourceTag, kSourceTagLength))
		self.currentTweet.source = [self currentString];
	else if (xmlEqualTags(localName, kInReplyToStatusIDTag, kInReplyToStatusIDTagLength))
		self.currentTweet.inReplyToStatusID = [self currentString];
	else if (xmlEqualTags(localName, kInReplyToUserIDTag, kInReplyToUserIDTagLength))
		self.currentTweet.inReplyToUserID = [self currentString];
	else if (xmlEqualTags(localName, kInReplyToScreenNameTag, kInReplyToScreenNameTagLength))
		self.currentTweet.inReplyToScreenName = [self currentString];
}


- (BOOL)shouldStoreCharactersForTag:(const xmlChar *)localName {
	/*We don't care about everything. This makes our parsing job faster and memory use lower.*/
	if (self.parsingUser)
		return xmlEqualTags(localName, kIDTag, kIDTagLength) || xmlEqualTags(localName, kNameTag, kNameTagLength) || xmlEqualTags(localName, kScreenNameTag, kScreenNameTagLength) || xmlEqualTags(localName, kLocationTag, kLocationTagLength) || xmlEqualTags (localName, kDescriptionTag, kDescriptionTagLength) || xmlEqualTags(localName, kProfileImageURLTag, kProfileImageURLTagLength) || xmlEqualTags(localName, kURLTag, kURLTagLength) || xmlEqualTags(localName, kFollowersCountTag, kFollowersCountTagLength);
	if (self.parsingTweet)
		return xmlEqualTags(localName, kCreatedAtTag, kCreatedAtTagLength) || xmlEqualTags(localName, kIDTag, kIDTagLength) || xmlEqualTags(localName, kTextTag, kTextTagLength) || xmlEqualTags(localName, kSourceTag, kSourceTagLength) || xmlEqualTags(localName, kInReplyToStatusIDTag, kInReplyToStatusIDTagLength) || xmlEqualTags(localName, kInReplyToUserIDTag, kInReplyToUserIDTagLength) || xmlEqualTags(localName, kInReplyToScreenNameTag, kInReplyToScreenNameTagLength);
	return NO;
}


- (void)xmlStartElement:(const xmlChar *)localName {
	if (xmlEqualTags(localName, kStatusTag, kStatusTagLength)) {
		self.parsingTweet = YES;
		[self addTweet];
	}
	else if (xmlEqualTags(localName, kUserTag, kUserTagLength))
		self.parsingUser = YES;
	else if ([self shouldStoreCharactersForTag:localName])
		[self startStoringCharacters];
}


- (void)xmlEndElement:(const xmlChar *)localName {
	if (self.parsingUser)
		[self processUserTag:localName];
	else if (self.parsingTweet)
		[self processStatusTag:localName];
	[self stopStoringCharactersAndMakeItEmpty];
}


- (void)xmlCharactersFound:(const xmlChar *)ch length:(int)length {
	if (self.storingCharacters)
		[self appendCharacters:(const char *)ch length:length];
}


- (void)xmlEndDocument {
	; //Well, you might or might want to do something like call a delegate.
	//Of course -- for bonus points -- don't create a tweets array at all. Call a delegate with each parsed tweet. Lowers memory use further.
}

@end


#pragma mark -
#pragma mark C SAX Callbacks

/*These end up calling Objective-C methods.*/

static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) {
	[(BSSAXTweetParser *)context xmlStartElement:localname]; //In this demo, we only care about localname
}


static void	endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) {    
	[(BSSAXTweetParser *)context xmlEndElement:localname]; //Only care about localname today
}


static void	charactersFoundSAX(void *context, const xmlChar *ch, int len) {
	[(BSSAXTweetParser *)context xmlCharactersFound:ch length:len];
}


static void errorEncounteredSAX(void *context, const char *msg, ...) {
	va_list args;	
    va_start(args, msg);
    fprintf(stdout, "OMGSAX.error: ");
    vfprintf(stdout, msg, args);
	va_end(args);
}


static void endDocumentSAX(void *context) {
	[(BSSAXTweetParser *)context xmlEndDocument];
}


static xmlSAXHandler saxHandlerStruct = {
	nil,                       /* internalSubset */
	nil,                       /* isStandalone   */
	nil,                       /* hasInternalSubset */
	nil,                       /* hasExternalSubset */
	nil,                       /* resolveEntity */
	nil,					   /* getEntity */
	nil,                       /* entityDecl */
	nil,                       /* notationDecl */
	nil,                       /* attributeDecl */
	nil,                       /* elementDecl */
	nil,                       /* unparsedEntityDecl */
	nil,                       /* setDocumentLocator */
	nil,                       /* startDocument */
	endDocumentSAX,            /* endDocument */
	nil,                       /* startElement*/
	nil,                       /* endElement */
	nil,                       /* reference */
	charactersFoundSAX,        /* characters */
	nil,                       /* ignorableWhitespace */
	nil,                       /* processingInstruction */
	nil,                       /* comment */
	nil,                       /* warning */
	errorEncounteredSAX,       /* error */
	nil,                       /* fatalError //: unused error() get all the errors */
	nil,                       /* getParameterEntity */
	nil,                       /* cdataBlock */
	nil,                       /* externalSubset */
	XML_SAX2_MAGIC,            //
	nil,
	startElementSAX,           /* startElementNs */
	endElementSAX,             /* endElementNs */
	nil,                       /* serror */
};