adium / adium
Adium is a free instant messaging application for Mac OS X that can connect to AIM, MSN, Jabber, Yahoo, and more. This is the current development repository of Adium from version 1.4 onwards.
| commit 3106: | f75c7bfd396b |
| parent 3105: | e864e707fb51 |
| branch: | default |
Restructure AHHyperlinkScanner for better synchronization to nextURI.
Changed (Δ519 bytes):
raw changeset »
Frameworks/AutoHyperlinks Framework/Source/AHHyperlinkScanner.h (6 lines added, 6 lines removed)
Frameworks/AutoHyperlinks Framework/Source/AHHyperlinkScanner.m (123 lines added, 118 lines removed)
Up to file-list Frameworks/AutoHyperlinks Framework/Source/AHHyperlinkScanner.h:
| … | … | @@ -44,12 +44,12 @@ extern void AH_delete_buffer(AH_BUFFER_S |
44 |
44 |
|
45 |
45 |
@interface AHHyperlinkScanner : NSObject <NSFastEnumeration> |
46 |
46 |
{ |
47 |
NSDictionary *m_urlSchemes; |
|
48 |
NSString *m_scanString; |
|
49 |
NSAttributedString *m_scanAttrString; |
|
50 |
BOOL m_strictChecking; |
|
51 |
unsigned long m_scanLocation; |
|
52 |
unsigned long m_scanStringLength; |
|
47 |
NSDictionary *m_urlSchemes; |
|
48 |
NSString *m_scanString; |
|
49 |
NSAttributedString *m_scanAttrString; |
|
50 |
BOOL m_strictChecking; |
|
51 |
unsigned long m_scanLocation; |
|
52 |
unsigned long m_scanStringLength; |
|
53 |
53 |
} |
54 |
54 |
|
55 |
55 |
Up to file-list Frameworks/AutoHyperlinks Framework/Source/AHHyperlinkScanner.m:
34 |
34 |
#define ENC_CHAR_KEY @"encChar" |
35 |
35 |
|
36 |
36 |
@interface AHHyperlinkScanner (PRIVATE) |
37 |
- (AHMarkedHyperlink *)nextURIFromLocation:(unsigned long *)scanLocation; |
|
37 |
38 |
- (NSRange)_longestBalancedEnclosureInRange:(NSRange)inRange; |
38 |
39 |
- (BOOL)_scanString:(NSString *)inString upToCharactersFromSet:(NSCharacterSet *)inCharSet intoRange:(NSRange *)outRangeRef fromIndex:(unsigned long *)idx; |
39 |
40 |
- (BOOL)_scanString:(NSString *)inString charactersFromSet:(NSCharacterSet *)inCharSet intoRange:(NSRange *)outRangeRef fromIndex:(unsigned long *)idx; |
125 |
126 |
} |
126 |
127 |
|
127 |
128 |
#pragma mark Init/Dealloc |
128 |
||
129 |
- (id)init |
|
130 |
{ |
|
131 |
if((self = [super init])){ |
|
132 |
m_scanLocation = 0; |
|
133 |
} |
|
134 |
return self; |
|
135 |
} |
|
129 |
136 |
|
130 |
137 |
- (id)initWithString:(NSString *)inString usingStrictChecking:(BOOL)flag |
131 |
138 |
{ |
136 |
143 |
@"ftp://", @"ftp", |
137 |
144 |
nil]; |
138 |
145 |
m_strictChecking = flag; |
139 |
m_scanLocation = 0; |
|
140 |
146 |
m_scanStringLength = [m_scanString length]; |
141 |
147 |
} |
142 |
148 |
return self; |
151 |
157 |
@"ftp://", @"ftp", |
152 |
158 |
nil]; |
153 |
159 |
m_strictChecking = flag; |
154 |
m_scanLocation = 0; |
|
155 |
160 |
m_scanStringLength = [m_scanString length]; |
156 |
161 |
} |
157 |
162 |
return self; |
174 |
179 |
|
175 |
180 |
+ (BOOL)isStringValidURI:(NSString *)inString usingStrict:(BOOL)useStrictChecking fromIndex:(unsigned long *)index withStatus:(AH_URI_VERIFICATION_STATUS *)validStatus |
176 |
181 |
{ |
177 |
|
|
182 |
AH_BUFFER_STATE buf; // buffer for flex to scan from |
|
178 |
183 |
yyscan_t scanner; // pointer to the flex scanner opaque type |
179 |
184 |
const char *inStringEnc; |
180 |
unsigned long encodedLength; |
|
181 |
||
185 |
unsigned long encodedLength; |
|
186 |
||
182 |
187 |
if(!validStatus){ |
183 |
188 |
AH_URI_VERIFICATION_STATUS newStatus = AH_URL_INVALID; |
184 |
189 |
validStatus = &newStatus; |
185 |
190 |
} |
186 |
191 |
|
187 |
192 |
*validStatus = AH_URL_INVALID; // assume the URL is invalid |
188 |
||
193 |
||
189 |
194 |
// Find the fastest 8-bit wide encoding possible for the c string |
190 |
195 |
NSStringEncoding stringEnc = [inString fastestEncoding]; |
191 |
196 |
if([@" " lengthOfBytesUsingEncoding:stringEnc] > 1U) |
192 |
197 |
stringEnc = NSUTF8StringEncoding; |
193 |
||
198 |
||
194 |
199 |
if (!(inStringEnc = [inString cStringUsingEncoding:stringEnc])) { |
195 |
200 |
return NO; |
196 |
201 |
} |
197 |
202 |
|
198 |
203 |
|
199 |
204 |
encodedLength = strlen(inStringEnc); // length of the string in utf-8 |
200 |
||
205 |
||
201 |
206 |
// initialize the buffer (flex automatically switches to the buffer in this function) |
202 |
207 |
AHlex_init(&scanner); |
203 |
buf = AH_scan_string(inStringEnc, scanner); |
|
204 |
||
205 |
// call flex to parse the input |
|
206 |
*validStatus = AHlex(scanner); |
|
208 |
buf = AH_scan_string(inStringEnc, scanner); |
|
209 |
||
210 |
// call flex to parse the input |
|
211 |
*validStatus = AHlex(scanner); |
|
207 |
212 |
if(index) *index += AHget_leng(scanner); |
208 |
213 |
|
209 |
// condition for valid URI's |
|
210 |
if(*validStatus == AH_URL_VALID || *validStatus == AH_MAILTO_VALID || *validStatus == AH_FILE_VALID){ |
|
211 |
AH_delete_buffer(buf, scanner); //remove the buffer from flex. |
|
212 |
buf = NULL; //null the buffer pointer for safty's sake. |
|
213 |
||
214 |
// check that the whole string was matched by flex. |
|
215 |
// this prevents silly things like "blah...com" from being seen as links |
|
216 |
if(AHget_leng(scanner) == encodedLength){ |
|
214 |
// condition for valid URI's |
|
215 |
if(*validStatus == AH_URL_VALID || *validStatus == AH_MAILTO_VALID || *validStatus == AH_FILE_VALID){ |
|
216 |
AH_delete_buffer(buf, scanner); //remove the buffer from flex. |
|
217 |
buf = NULL; //null the buffer pointer for safty's sake. |
|
218 |
||
219 |
// check that the whole string was matched by flex. |
|
220 |
// this prevents silly things like "blah...com" from being seen as links |
|
221 |
if(AHget_leng(scanner) == encodedLength){ |
|
217 |
222 |
AHlex_destroy(scanner); |
218 |
return YES; |
|
219 |
} |
|
223 |
return YES; |
|
224 |
} |
|
220 |
225 |
// condition for degenerate URL's (A.K.A. URI's sans specifiers), requres strict checking to be NO. |
221 |
}else if((*validStatus == AH_URL_DEGENERATE || *validStatus == AH_MAILTO_DEGENERATE) && !useStrictChecking){ |
|
222 |
AH_delete_buffer(buf, scanner); |
|
223 |
buf = NULL; |
|
224 |
if(AHget_leng(scanner) == encodedLength){ |
|
226 |
}else if((*validStatus == AH_URL_DEGENERATE || *validStatus == AH_MAILTO_DEGENERATE) && !useStrictChecking){ |
|
227 |
AH_delete_buffer(buf, scanner); |
|
228 |
buf = NULL; |
|
229 |
if(AHget_leng(scanner) == encodedLength){ |
|
225 |
230 |
AHlex_destroy(scanner); |
226 |
return YES; |
|
227 |
} |
|
231 |
return YES; |
|
232 |
} |
|
228 |
233 |
// if it ain't vaild, and it ain't degenerate, then it's invalid. |
229 |
}else{ |
|
230 |
AH_delete_buffer(buf, scanner); |
|
231 |
buf = NULL; |
|
234 |
}else{ |
|
235 |
AH_delete_buffer(buf, scanner); |
|
236 |
buf = NULL; |
|
232 |
237 |
AHlex_destroy(scanner); |
233 |
return NO; |
|
234 |
} |
|
235 |
// default case, if the range checking above fails. |
|
238 |
return NO; |
|
239 |
} |
|
240 |
// default case, if the range checking above fails. |
|
236 |
241 |
AHlex_destroy(scanner); |
237 |
|
|
242 |
return NO; |
|
238 |
243 |
} |
239 |
244 |
|
240 |
245 |
#pragma mark Accessors |
241 |
246 |
|
242 |
- (AHMarkedHyperlink *)nextURI |
|
247 |
- (AHMarkedHyperlink *)nextURIFromLocation:(unsigned long * const)scanLocation |
|
243 |
248 |
{ |
244 |
NSRange scannedRange; |
|
245 |
unsigned long scannedLocation = m_scanLocation; |
|
249 |
NSRange scannedRange = NSMakeRange(0, 0); |
|
250 |
unsigned long scannedLocation = *scanLocation; |
|
246 |
251 |
|
247 |
// scan upto the next whitespace char so that we don't unnecessarity confuse flex |
|
248 |
// otherwise we end up validating urls that look like this "http://www.adium.im/ <--cool" |
|
252 |
// scan upto the next whitespace char so that we don't unnecessarity confuse flex |
|
253 |
// otherwise we end up validating urls that look like this "http://www.adium.im/ <--cool" |
|
249 |
254 |
[self _scanString:m_scanString charactersFromSet:startSet intoRange:nil fromIndex:&scannedLocation]; |
250 |
||
255 |
||
251 |
256 |
// main scanning loop |
252 |
257 |
while([self _scanString:m_scanString upToCharactersFromSet:skipSet intoRange:&scannedRange fromIndex:&scannedLocation]) { |
253 |
258 |
BOOL foundUnpairedEnclosureCharacter = NO; |
254 |
||
259 |
||
255 |
260 |
// Check for and filter enclosures. We can't add (, [, etc. to the skipSet as they may be in a URI |
256 |
261 |
if([enclosureSet characterIsMember:[m_scanString characterAtIndex:scannedRange.location]]){ |
257 |
262 |
unsigned long encIdx = [enclosureStartArray indexOfObject:[m_scanString substringWithRange:NSMakeRange(scannedRange.location, 1)]]; |
266 |
271 |
} |
267 |
272 |
} |
268 |
273 |
if(!scannedRange.length) break; |
269 |
||
274 |
||
270 |
275 |
// Find balanced enclosure chars |
271 |
276 |
NSRange longestEnclosure = [self _longestBalancedEnclosureInRange:scannedRange]; |
272 |
277 |
while (scannedRange.length > 2 && [endSet characterIsMember:[m_scanString characterAtIndex:(scannedRange.location + scannedRange.length - 1)]]) { |
276 |
281 |
}else break; |
277 |
282 |
} |
278 |
283 |
|
279 |
// if we have a valid URL then save the scanned string, and make a SHMarkedHyperlink out of it. |
|
280 |
// this way, we can preserve things like the matched string (to be converted to a NSURL), |
|
281 |
// parent string, its validation status (valid, file, degenerate, etc), and its range in the parent string |
|
284 |
// if we have a valid URL then save the scanned string, and make a SHMarkedHyperlink out of it. |
|
285 |
// this way, we can preserve things like the matched string (to be converted to a NSURL), |
|
286 |
// parent string, its validation status (valid, file, degenerate, etc), and its range in the parent string |
|
282 |
287 |
AH_URI_VERIFICATION_STATUS validStatus; |
283 |
288 |
NSString *_scanString = nil; |
284 |
289 |
if(3 < scannedRange.length) _scanString = [m_scanString substringWithRange:scannedRange]; |
285 |
||
286 |
if((3 < scannedRange.length) && [[self class] isStringValidURI:_scanString usingStrict:m_strictChecking fromIndex:&m_scanLocation withStatus:&validStatus]){ |
|
287 |
AHMarkedHyperlink *markedLink; |
|
290 |
||
291 |
if((3 < scannedRange.length) && [[self class] isStringValidURI:_scanString usingStrict:m_strictChecking fromIndex:scanLocation withStatus:&validStatus]){ |
|
292 |
AHMarkedHyperlink *markedLink; |
|
288 |
293 |
|
289 |
//insert typical specifiers if the URL is degenerate |
|
290 |
switch(validStatus){ |
|
291 |
case AH_URL_DEGENERATE: |
|
292 |
{ |
|
293 |
NSString *scheme = DEFAULT_URL_SCHEME; |
|
294 |
unsigned long i = 0; |
|
295 |
||
296 |
NSRange firstComponent; |
|
297 |
[self _scanString:_scanString |
|
298 |
upToCharactersFromSet:hostnameComponentSeparatorSet |
|
299 |
intoRange:&firstComponent |
|
300 |
fromIndex:&i]; |
|
301 |
||
302 |
if(NSNotFound != firstComponent.location) { |
|
303 |
NSString *hostnameScheme = [m_urlSchemes objectForKey:[_scanString substringWithRange:firstComponent]]; |
|
304 |
if(hostnameScheme) scheme = hostnameScheme; |
|
305 |
} |
|
306 |
||
307 |
_scanString = [scheme stringByAppendingString:_scanString]; |
|
308 |
||
309 |
break; |
|
310 |
} |
|
311 |
||
312 |
case AH_MAILTO_DEGENERATE: |
|
294 |
//insert typical specifiers if the URL is degenerate |
|
295 |
switch(validStatus){ |
|
296 |
case AH_URL_DEGENERATE: |
|
297 |
{ |
|
298 |
NSString *scheme = DEFAULT_URL_SCHEME; |
|
299 |
unsigned long i = 0; |
|
300 |
||
301 |
NSRange firstComponent; |
|
302 |
[self _scanString:_scanString |
|
303 |
upToCharactersFromSet:hostnameComponentSeparatorSet |
|
304 |
intoRange:&firstComponent |
|
305 |
fromIndex:&i]; |
|
306 |
||
307 |
if(NSNotFound != firstComponent.location) { |
|
308 |
NSString *hostnameScheme = [m_urlSchemes objectForKey:[_scanString substringWithRange:firstComponent]]; |
|
309 |
if(hostnameScheme) scheme = hostnameScheme; |
|
310 |
} |
|
311 |
||
312 |
_scanString = [scheme stringByAppendingString:_scanString]; |
|
313 |
||
314 |
break; |
|
315 |
} |
|
316 |
||
317 |
case AH_MAILTO_DEGENERATE: |
|
313 |
318 |
_scanString = [@"mailto:" stringByAppendingString:_scanString]; |
314 |
break; |
|
315 |
default: |
|
316 |
break; |
|
317 |
} |
|
318 |
||
319 |
//make a marked link |
|
320 |
markedLink = [AHMarkedHyperlink hyperlinkWithString:_scanString |
|
321 |
withValidationStatus:validStatus |
|
322 |
parentString:m_scanString |
|
323 |
andRange:scannedRange]; |
|
324 |
return [markedLink URL]? markedLink : nil; |
|
325 |
} |
|
326 |
||
319 |
break; |
|
320 |
default: |
|
321 |
break; |
|
322 |
} |
|
323 |
||
324 |
//make a marked link |
|
325 |
markedLink = [AHMarkedHyperlink hyperlinkWithString:_scanString |
|
326 |
withValidationStatus:validStatus |
|
327 |
parentString:m_scanString |
|
328 |
andRange:scannedRange]; |
|
329 |
return [markedLink URL]? markedLink : nil; |
|
330 |
} |
|
331 |
||
327 |
332 |
//step location after scanning a string |
328 |
333 |
if (foundUnpairedEnclosureCharacter){ |
329 |
|
|
334 |
(*scanLocation)++; |
|
330 |
335 |
}else{ |
331 |
336 |
NSRange startRange = [m_scanString rangeOfCharacterFromSet:puncSet options:NSLiteralSearch range:scannedRange]; |
332 |
337 |
if (startRange.location != NSNotFound) |
333 |
|
|
338 |
*scanLocation = startRange.location + startRange.length; |
|
334 |
339 |
else |
335 |
|
|
340 |
*scanLocation += scannedRange.length; |
|
336 |
341 |
} |
337 |
||
338 |
scannedLocation = m_scanLocation; |
|
339 |
} |
|
342 |
||
343 |
scannedLocation = *scanLocation; |
|
344 |
} |
|
340 |
345 |
|
341 |
// if we're here, then NSScanner hit the end of the string |
|
342 |
// set AHStringOffset to the string length here so we avoid potential infinite looping with many trailing spaces. |
|
343 |
m_scanLocation = m_scanStringLength; |
|
344 |
return nil; |
|
346 |
// if we're here, then NSScanner hit the end of the string |
|
347 |
// set AHStringOffset to the string length here so we avoid potential infinite looping with many trailing spaces. |
|
348 |
*scanLocation = m_scanStringLength; |
|
349 |
return nil; |
|
350 |
} |
|
351 |
||
352 |
- (AHMarkedHyperlink *)nextURI |
|
353 |
{ |
|
354 |
@synchronized(self) { |
|
355 |
return [self nextURIFromLocation:&m_scanLocation]; |
|
356 |
} |
|
345 |
357 |
} |
346 |
358 |
|
347 |
359 |
-(NSArray *)allURIs |
348 |
360 |
{ |
349 |
NSMutableArray *rangeArray = [NSMutableArray array]; |
|
350 |
AHMarkedHyperlink *markedLink; |
|
351 |
unsigned long _holdOffset = m_scanLocation; // store location for later restoration; |
|
352 |
m_scanLocation = 0; //set the offset to 0. |
|
353 |
||
354 |
//build an array of marked links. |
|
355 |
|
|
361 |
NSMutableArray *rangeArray = [NSMutableArray array]; |
|
362 |
AHMarkedHyperlink *markedLink; |
|
363 |
unsigned long offset = 0; |
|
364 |
||
365 |
//build an array of marked links. |
|
366 |
while((markedLink = [self nextURIFromLocation:&offset])){ |
|
356 |
367 |
[rangeArray addObject:markedLink]; |
357 |
368 |
} |
358 |
m_scanLocation = _holdOffset; // reset scanLocation |
|
359 |
369 |
return rangeArray; |
360 |
370 |
} |
361 |
371 |
|
364 |
374 |
NSMutableAttributedString *linkifiedString; |
365 |
375 |
AHMarkedHyperlink *markedLink; |
366 |
376 |
BOOL _didFindLinks = NO; |
367 |
unsigned long _holdOffset = m_scanLocation; // store location for later restoration; |
|
368 |
||
369 |
m_scanLocation = 0; |
|
370 |
377 |
|
371 |
378 |
if(m_scanAttrString) { |
372 |
379 |
linkifiedString = [[m_scanAttrString mutableCopy] autorelease]; |
384 |
391 |
range:[markedLink range]]; |
385 |
392 |
} |
386 |
393 |
} |
387 |
||
388 |
m_scanLocation = _holdOffset; // reset scanLocation |
|
389 |
||
394 |
||
390 |
395 |
return _didFindLinks? linkifiedString : |
391 |
396 |
m_scanAttrString ? [[m_scanAttrString retain] autorelease] : [[[NSMutableAttributedString alloc] initWithString:m_scanString] autorelease]; |
392 |
397 |
} |
430 |
435 |
|
431 |
436 |
while(encScanLocation < inRange.length + inRange.location) { |
432 |
437 |
[self _scanString:m_scanString upToCharactersFromSet:enclosureSet intoRange:nil fromIndex:&encScanLocation]; |
433 |
||
438 |
||
434 |
439 |
if(encScanLocation >= (inRange.location + inRange.length)) break; |
435 |
||
440 |
||
436 |
441 |
matchChar = [m_scanString substringWithRange:NSMakeRange(encScanLocation, 1)]; |
437 |
||
442 |
||
438 |
443 |
if([enclosureStartArray containsObject:matchChar]) { |
439 |
444 |
encDict = [NSDictionary dictionaryWithObjects:[NSArray arrayWithObjects:[NSNumber numberWithUnsignedLong:encScanLocation], matchChar, nil] |
440 |
forKeys:encKeys]; |
|
441 |
if(!enclosureStack) enclosureStack = [NSMutableArray arrayWithCapacity:1]; |
|
445 |
forKeys:encKeys]; |
|
446 |
if(!enclosureStack) enclosureStack = [NSMutableArray array]; |
|
442 |
447 |
[enclosureStack addObject:encDict]; |
443 |
448 |
}else if([enclosureStopArray containsObject:matchChar]) { |
444 |
449 |
NSEnumerator *encEnumerator = [enclosureStack objectEnumerator]; |
447 |
452 |
unsigned long encStartIndex = [enclosureStartArray indexOfObjectIdenticalTo:[encDict objectForKey:ENC_CHAR_KEY]]; |
448 |
453 |
if([enclosureStopArray indexOfObjectIdenticalTo:matchChar] == encStartIndex) { |
449 |
454 |
NSRange encRange = NSMakeRange(encTagIndex, encScanLocation - encTagIndex + 1); |
450 |
if(!enclosureStack) enclosureStack = [NSMutableArray arrayWithCapacity:1]; |
|
451 |
if(!enclosureArray) enclosureArray = [NSMutableArray arrayWithCapacity:1]; |
|
455 |
if(!enclosureStack) enclosureStack = [NSMutableArray array]; |
|
456 |
if(!enclosureArray) enclosureArray = [NSMutableArray array]; |
|
452 |
457 |
[enclosureStack removeObject:encDict]; |
453 |
458 |
[enclosureArray addObject:NSStringFromRange(encRange)]; |
454 |
459 |
break; |
