|  |  | 
|  | #import "MultipartFormDataParser.h" | 
|  | #import "DDData.h" | 
|  | #import "HTTPLogging.h" | 
|  |  | 
|  | #pragma mark log level | 
|  |  | 
|  | #ifdef DEBUG | 
|  | static const int httpLogLevel = HTTP_LOG_LEVEL_WARN; | 
|  | #else | 
|  | static const int httpLogLevel = HTTP_LOG_LEVEL_WARN; | 
|  | #endif | 
|  |  | 
|  | #ifdef __x86_64__ | 
|  | #define FMTNSINT "li" | 
|  | #else | 
|  | #define FMTNSINT "i" | 
|  | #endif | 
|  |  | 
|  |  | 
|  | //----------------------------------------------------------------- | 
|  | // interface MultipartFormDataParser (private) | 
|  | //----------------------------------------------------------------- | 
|  |  | 
|  |  | 
|  | @interface MultipartFormDataParser (private) | 
|  | + (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding; | 
|  |  | 
|  | - (int) findHeaderEnd:(NSData*) workingData fromOffset:(int) offset; | 
|  | - (int) findContentEnd:(NSData*) data fromOffset:(int) offset; | 
|  |  | 
|  | - (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(NSUInteger) length encoding:(int) encoding; | 
|  | - (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data; | 
|  |  | 
|  | - (int) processPreamble:(NSData*) workingData; | 
|  |  | 
|  | @end | 
|  |  | 
|  |  | 
|  | //----------------------------------------------------------------- | 
|  | // implementation MultipartFormDataParser | 
|  | //----------------------------------------------------------------- | 
|  |  | 
|  |  | 
|  | @implementation MultipartFormDataParser | 
|  | @synthesize delegate,formEncoding; | 
|  |  | 
|  | - (id) initWithBoundary:(NSString*) boundary formEncoding:(NSStringEncoding) _formEncoding { | 
|  | if( nil == (self = [super init]) ){ | 
|  | return self; | 
|  | } | 
|  | if( nil == boundary ) { | 
|  | HTTPLogWarn(@"MultipartFormDataParser: init with zero boundary"); | 
|  | return nil; | 
|  | } | 
|  | boundaryData = [[@"\r\n--" stringByAppendingString:boundary] dataUsingEncoding:NSASCIIStringEncoding]; | 
|  |  | 
|  | pendingData = [[NSMutableData alloc] init]; | 
|  | currentEncoding = contentTransferEncoding_binary; | 
|  | currentHeader = nil; | 
|  |  | 
|  | formEncoding = _formEncoding; | 
|  | reachedEpilogue = NO; | 
|  | processedPreamble = NO; | 
|  |  | 
|  | return self; | 
|  | } | 
|  |  | 
|  |  | 
|  | - (BOOL) appendData:(NSData *)data { | 
|  | // Can't parse without boundary; | 
|  | if( nil == boundaryData ) { | 
|  | HTTPLogError(@"MultipartFormDataParser: Trying to parse multipart without specifying a valid boundary"); | 
|  | assert(false); | 
|  | return NO; | 
|  | } | 
|  | NSData* workingData = data; | 
|  |  | 
|  | if( pendingData.length ) { | 
|  | [pendingData appendData:data]; | 
|  | workingData = pendingData; | 
|  | } | 
|  |  | 
|  | // the parser saves parse stat in the offset variable, which indicates offset of unhandled part in | 
|  | // currently received chunk. Before returning, we always drop all data up to offset, leaving | 
|  | // only unhandled for the next call | 
|  |  | 
|  | int offset = 0; | 
|  |  | 
|  | // don't parse data unless its size is greater then boundary length, so we couldn't | 
|  | // misfind the boundary, if it got split into different data chunks | 
|  | NSUInteger sizeToLeavePending = boundaryData.length; | 
|  |  | 
|  | if( !reachedEpilogue && workingData.length <= sizeToLeavePending )  { | 
|  | // not enough data even to start parsing. | 
|  | // save to pending data. | 
|  | if( !pendingData.length ) { | 
|  | [pendingData appendData:data]; | 
|  | } | 
|  | if( checkForContentEnd ) { | 
|  | if( pendingData.length >= 2 ) { | 
|  | if( *(uint16_t*)(pendingData.bytes + offset) == 0x2D2D ) { | 
|  | // we found the multipart end. all coming next is an epilogue. | 
|  | HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message"); | 
|  | waitingForCRLF = YES; | 
|  | reachedEpilogue = YES; | 
|  | offset+= 2; | 
|  | } | 
|  | else { | 
|  | checkForContentEnd = NO; | 
|  | waitingForCRLF = YES; | 
|  | return YES; | 
|  | } | 
|  | } else { | 
|  | return YES; | 
|  | } | 
|  |  | 
|  | } | 
|  | else { | 
|  | return YES; | 
|  | } | 
|  | } | 
|  | while( true ) { | 
|  | if( checkForContentEnd ) { | 
|  | // the flag will be raised to check if the last part was the last one. | 
|  | if( offset < workingData.length -1 ) { | 
|  | char* bytes = (char*) workingData.bytes; | 
|  | if( *(uint16_t*)(bytes + offset) == 0x2D2D ) { | 
|  | // we found the multipart end. all coming next is an epilogue. | 
|  | HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message"); | 
|  | checkForContentEnd = NO; | 
|  | reachedEpilogue = YES; | 
|  | // still wait for CRLF, that comes after boundary, but before epilogue. | 
|  | waitingForCRLF = YES; | 
|  | offset += 2; | 
|  | } | 
|  | else { | 
|  | // it's not content end, we have to wait till separator line end before next part comes | 
|  | waitingForCRLF = YES; | 
|  | checkForContentEnd = NO; | 
|  | } | 
|  | } | 
|  | else { | 
|  | // we haven't got enough data to check for content end. | 
|  | // save current unhandled data (it may be 1 byte) to pending and recheck on next chunk received | 
|  | if( offset < workingData.length ) { | 
|  | [pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]]; | 
|  | } | 
|  | else { | 
|  | // there is no unhandled data now, wait for more chunks | 
|  | [pendingData setData:[NSData data]]; | 
|  | } | 
|  | return YES; | 
|  | } | 
|  | } | 
|  | if( waitingForCRLF ) { | 
|  |  | 
|  | // the flag will be raised in the code below, meaning, we've read the boundary, but | 
|  | // didnt find the end of boundary line yet. | 
|  |  | 
|  | offset = [self offsetTillNewlineSinceOffset:offset inData:workingData]; | 
|  | if( -1 == offset ) { | 
|  | // didnt find the endl again. | 
|  | if( offset ) { | 
|  | // we still have to save the unhandled data (maybe it's 1 byte CR) | 
|  | if( *((char*)workingData.bytes + workingData.length -1) == '\r' ) { | 
|  | [pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]]; | 
|  | } | 
|  | else { | 
|  | // or save nothing if it wasnt | 
|  | [pendingData setData:[NSData data]]; | 
|  | } | 
|  | } | 
|  | return YES; | 
|  | } | 
|  | waitingForCRLF = NO; | 
|  | } | 
|  | if( !processedPreamble ) { | 
|  | // got to find the first boundary before the actual content begins. | 
|  | offset = [self processPreamble:workingData]; | 
|  | // wait for more data for preamble | 
|  | if( -1 == offset ) | 
|  | return YES; | 
|  | // invoke continue to skip newline after boundary. | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if( reachedEpilogue ) { | 
|  | // parse all epilogue data to delegate. | 
|  | if( [delegate respondsToSelector:@selector(processEpilogueData:)] ) { | 
|  | NSData* epilogueData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length: workingData.length - offset freeWhenDone:NO]; | 
|  | [delegate processEpilogueData: epilogueData]; | 
|  | } | 
|  | return YES; | 
|  | } | 
|  |  | 
|  | if( nil == currentHeader ) { | 
|  | // nil == currentHeader is a state flag, indicating we are waiting for header now. | 
|  | // whenever part is over, currentHeader is set to nil. | 
|  |  | 
|  | // try to find CRLFCRLF bytes in the data, which indicates header end. | 
|  | // we won't parse header parts, as they won't be too large. | 
|  | int headerEnd = [self findHeaderEnd:workingData fromOffset:offset]; | 
|  | if( -1 == headerEnd ) { | 
|  | // didn't recieve the full header yet. | 
|  | if( !pendingData.length) { | 
|  | // store the unprocessed data till next chunks come | 
|  | [pendingData appendBytes:data.bytes + offset length:data.length - offset]; | 
|  | } | 
|  | else { | 
|  | if( offset ) { | 
|  | // save the current parse state; drop all handled data and save unhandled only. | 
|  | pendingData = [[NSMutableData alloc] initWithBytes: (char*) workingData.bytes + offset length:workingData.length - offset]; | 
|  | } | 
|  | } | 
|  | return  YES; | 
|  | } | 
|  | else { | 
|  |  | 
|  | // let the header parser do it's job from now on. | 
|  | NSData * headerData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length:headerEnd + 2 - offset freeWhenDone:NO]; | 
|  | currentHeader = [[MultipartMessageHeader alloc] initWithData:headerData formEncoding:formEncoding]; | 
|  |  | 
|  | if( nil == currentHeader ) { | 
|  | // we've found the data is in wrong format. | 
|  | HTTPLogError(@"MultipartFormDataParser: MultipartFormDataParser: wrong input format, coulnd't get a valid header"); | 
|  | return NO; | 
|  | } | 
|  | if( [delegate respondsToSelector:@selector(processStartOfPartWithHeader:)] ) { | 
|  | [delegate processStartOfPartWithHeader:currentHeader]; | 
|  | } | 
|  |  | 
|  | HTTPLogVerbose(@"MultipartFormDataParser: MultipartFormDataParser: Retrieved part header."); | 
|  | } | 
|  | // skip the two trailing \r\n, in addition to the whole header. | 
|  | offset = headerEnd + 4; | 
|  | } | 
|  | // after we've got the header, we try to | 
|  | // find the boundary in the data. | 
|  | int contentEnd = [self findContentEnd:workingData fromOffset:offset]; | 
|  |  | 
|  | if( contentEnd == -1 ) { | 
|  |  | 
|  | // this case, we didn't find the boundary, so the data is related to the current part. | 
|  | // we leave the sizeToLeavePending amount of bytes to make sure we don't include | 
|  | // boundary part in processed data. | 
|  | NSUInteger sizeToPass = workingData.length - offset - sizeToLeavePending; | 
|  |  | 
|  | // if we parse BASE64 encoded data, or Quoted-Printable data, we will make sure we don't break the format | 
|  | int leaveTrailing = [self numberOfBytesToLeavePendingWithData:data length:sizeToPass encoding:currentEncoding]; | 
|  | sizeToPass -= leaveTrailing; | 
|  |  | 
|  | if( sizeToPass <= 0 ) { | 
|  | // wait for more data! | 
|  | if( offset ) { | 
|  | [pendingData setData:[NSData dataWithBytes:(char*) workingData.bytes + offset length:workingData.length - offset]]; | 
|  | } | 
|  | return YES; | 
|  | } | 
|  | // decode the chunk and let the delegate use it (store in a file, for example) | 
|  | NSData* decodedData = [MultipartFormDataParser decodedDataFromData:[NSData dataWithBytesNoCopy:(char*)workingData.bytes + offset length:workingData.length - offset - sizeToLeavePending freeWhenDone:NO] encoding:currentEncoding]; | 
|  |  | 
|  | if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) { | 
|  | HTTPLogVerbose(@"MultipartFormDataParser: Processed %"FMTNSINT" bytes of body",sizeToPass); | 
|  |  | 
|  | [delegate processContent: decodedData WithHeader:currentHeader]; | 
|  | } | 
|  |  | 
|  | // store the unprocessed data till the next chunks come. | 
|  | [pendingData setData:[NSData dataWithBytes:(char*)workingData.bytes + workingData.length - sizeToLeavePending length:sizeToLeavePending]]; | 
|  | return YES; | 
|  | } | 
|  | else { | 
|  |  | 
|  | // Here we found the boundary. | 
|  | // let the delegate process it, and continue going to the next parts. | 
|  | if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) { | 
|  | [delegate processContent:[NSData dataWithBytesNoCopy:(char*) workingData.bytes + offset length:contentEnd - offset freeWhenDone:NO] WithHeader:currentHeader]; | 
|  | } | 
|  |  | 
|  | if( [delegate respondsToSelector:@selector(processEndOfPartWithHeader:)] ){ | 
|  | [delegate processEndOfPartWithHeader:currentHeader]; | 
|  | HTTPLogVerbose(@"MultipartFormDataParser: End of body part"); | 
|  | } | 
|  | currentHeader = nil; | 
|  |  | 
|  | // set up offset to continue with the remaining data (if any) | 
|  | // cast to int because above comment suggests a small number | 
|  | offset = contentEnd + (int)boundaryData.length; | 
|  | checkForContentEnd = YES; | 
|  | // setting the flag tells the parser to skip all the data till CRLF | 
|  | } | 
|  | } | 
|  | return YES; | 
|  | } | 
|  |  | 
|  |  | 
|  | //----------------------------------------------------------------- | 
|  | #pragma mark private methods | 
|  |  | 
|  | - (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data { | 
|  | char* bytes = (char*) data.bytes; | 
|  | NSUInteger length = data.length; | 
|  | if( offset >= length - 1 ) | 
|  | return -1; | 
|  |  | 
|  | while ( *(uint16_t*)(bytes + offset) != 0x0A0D ) { | 
|  | // find the trailing \r\n after the boundary. The boundary line might have any number of whitespaces before CRLF, according to rfc2046 | 
|  |  | 
|  | // in debug, we might also want to know, if the file is somehow misformatted. | 
|  | #ifdef DEBUG | 
|  | if( !isspace(*(bytes+offset)) ) { | 
|  | HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset) ); | 
|  | } | 
|  | if( !isspace(*(bytes+offset+1)) ) { | 
|  | HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset+1) ); | 
|  | } | 
|  | #endif | 
|  | offset++; | 
|  | if( offset >= length ) { | 
|  | // no endl found within current data | 
|  | return -1; | 
|  | } | 
|  | } | 
|  |  | 
|  | offset += 2; | 
|  | return offset; | 
|  | } | 
|  |  | 
|  |  | 
|  | - (int) processPreamble:(NSData*) data { | 
|  | int offset = 0; | 
|  |  | 
|  | char* boundaryBytes = (char*) boundaryData.bytes + 2; // the first boundary won't have CRLF preceding. | 
|  | char* dataBytes = (char*) data.bytes; | 
|  | NSUInteger boundaryLength = boundaryData.length - 2; | 
|  | NSUInteger dataLength = data.length; | 
|  |  | 
|  | // find the boundary without leading CRLF. | 
|  | while( offset < dataLength - boundaryLength +1 ) { | 
|  | int i; | 
|  | for( i = 0;i < boundaryLength; i++ ) { | 
|  | if( boundaryBytes[i] != dataBytes[offset + i] ) | 
|  | break; | 
|  | } | 
|  | if( i == boundaryLength ) { | 
|  | break; | 
|  | } | 
|  | offset++; | 
|  | } | 
|  |  | 
|  | if( offset == dataLength ) { | 
|  | // the end of preamble wasn't found in this chunk | 
|  | NSUInteger sizeToProcess = dataLength - boundaryLength; | 
|  | if( sizeToProcess > 0) { | 
|  | if( [delegate respondsToSelector:@selector(processPreambleData:)] ) { | 
|  | NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: data.length - offset - boundaryLength freeWhenDone:NO]; | 
|  | [delegate processPreambleData:preambleData]; | 
|  | HTTPLogVerbose(@"MultipartFormDataParser: processed preamble"); | 
|  | } | 
|  | pendingData = [NSMutableData dataWithBytes: data.bytes + data.length - boundaryLength length:boundaryLength]; | 
|  | } | 
|  | return -1; | 
|  | } | 
|  | else { | 
|  | if ( offset && [delegate respondsToSelector:@selector(processPreambleData:)] ) { | 
|  | NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: offset freeWhenDone:NO]; | 
|  | [delegate processPreambleData:preambleData]; | 
|  | } | 
|  | offset +=boundaryLength; | 
|  | // tells to skip CRLF after the boundary. | 
|  | processedPreamble = YES; | 
|  | waitingForCRLF = YES; | 
|  | } | 
|  | return offset; | 
|  | } | 
|  |  | 
|  |  | 
|  |  | 
|  | - (int) findHeaderEnd:(NSData*) workingData fromOffset:(int)offset { | 
|  | char* bytes = (char*) workingData.bytes; | 
|  | NSUInteger inputLength = workingData.length; | 
|  | uint16_t separatorBytes = 0x0A0D; | 
|  |  | 
|  | while( true ) { | 
|  | if(inputLength < offset + 3 ) { | 
|  | // wait for more data | 
|  | return -1; | 
|  | } | 
|  | if( (*((uint16_t*) (bytes+offset)) == separatorBytes) && (*((uint16_t*) (bytes+offset)+1) == separatorBytes) ) { | 
|  | return offset; | 
|  | } | 
|  | offset++; | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  |  | 
|  | - (int) findContentEnd:(NSData*) data fromOffset:(int) offset { | 
|  | char* boundaryBytes = (char*) boundaryData.bytes; | 
|  | char* dataBytes = (char*) data.bytes; | 
|  | NSUInteger boundaryLength = boundaryData.length; | 
|  | NSUInteger dataLength = data.length; | 
|  |  | 
|  | while( offset < dataLength - boundaryLength +1 ) { | 
|  | int i; | 
|  | for( i = 0;i < boundaryLength; i++ ) { | 
|  | if( boundaryBytes[i] != dataBytes[offset + i] ) | 
|  | break; | 
|  | } | 
|  | if( i == boundaryLength ) { | 
|  | return offset; | 
|  | } | 
|  | offset++; | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  |  | 
|  | - (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(int) length encoding:(int) encoding { | 
|  | // If we have BASE64 or Quoted-Printable encoded data, we have to be sure | 
|  | // we don't break the format. | 
|  | int sizeToLeavePending = 0; | 
|  |  | 
|  | if( encoding == contentTransferEncoding_base64 ) { | 
|  | char* bytes = (char*) data.bytes; | 
|  | int i; | 
|  | for( i = length - 1; i > 0; i++ ) { | 
|  | if( * (uint16_t*) (bytes + i) == 0x0A0D ) { | 
|  | break; | 
|  | } | 
|  | } | 
|  | // now we've got to be sure that the length of passed data since last line | 
|  | // is multiplier of 4. | 
|  | sizeToLeavePending = (length - i) & ~0x11; // size to leave pending = length-i - (length-i) %4; | 
|  | return sizeToLeavePending; | 
|  | } | 
|  |  | 
|  | if( encoding == contentTransferEncoding_quotedPrintable ) { | 
|  | // we don't pass more less then 3 bytes anyway. | 
|  | if( length <= 2 ) | 
|  | return length; | 
|  | // check the last bytes to be start of encoded symbol. | 
|  | const char* bytes = data.bytes + length - 2; | 
|  | if( bytes[0] == '=' ) | 
|  | return 2; | 
|  | if( bytes[1] == '=' ) | 
|  | return 1; | 
|  | return 0; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  |  | 
|  | //----------------------------------------------------------------- | 
|  | #pragma mark decoding | 
|  |  | 
|  |  | 
|  | + (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding { | 
|  | switch (encoding) { | 
|  | case contentTransferEncoding_base64: { | 
|  | return [data base64Decoded]; | 
|  | } break; | 
|  |  | 
|  | case contentTransferEncoding_quotedPrintable: { | 
|  | return [self decodedDataFromQuotedPrintableData:data]; | 
|  | } break; | 
|  |  | 
|  | default: { | 
|  | return data; | 
|  | } break; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | + (NSData*) decodedDataFromQuotedPrintableData:(NSData *)data { | 
|  | //  http://tools.ietf.org/html/rfc2045#section-6.7 | 
|  |  | 
|  | const char hex []  = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', }; | 
|  |  | 
|  | NSMutableData* result = [[NSMutableData alloc] initWithLength:data.length]; | 
|  | const char* bytes = (const char*) data.bytes; | 
|  | int count = 0; | 
|  | NSUInteger length = data.length; | 
|  | while( count < length ) { | 
|  | if( bytes[count] == '=' ) { | 
|  | [result appendBytes:bytes length:count]; | 
|  | bytes = bytes + count + 1; | 
|  | length -= count + 1; | 
|  | count = 0; | 
|  |  | 
|  | if( length < 3 ) { | 
|  | HTTPLogWarn(@"MultipartFormDataParser: warning, trailing '=' in quoted printable data"); | 
|  | } | 
|  | // soft newline | 
|  | if( bytes[0] == '\r' ) { | 
|  | bytes += 1; | 
|  | if(bytes[1] == '\n' ) { | 
|  | bytes += 2; | 
|  | } | 
|  | continue; | 
|  | } | 
|  | char encodedByte = 0; | 
|  |  | 
|  | for( int i = 0; i < sizeof(hex); i++ ) { | 
|  | if( hex[i] == bytes[0] ) { | 
|  | encodedByte += i << 4; | 
|  | } | 
|  | if( hex[i] == bytes[1] ) { | 
|  | encodedByte += i; | 
|  | } | 
|  | } | 
|  | [result appendBytes:&encodedByte length:1]; | 
|  | bytes += 2; | 
|  | } | 
|  |  | 
|  | #ifdef DEBUG | 
|  | if( (unsigned char) bytes[count] > 126 ) { | 
|  | HTTPLogWarn(@"MultipartFormDataParser: Warning, character with code above 126 appears in quoted printable encoded data"); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | count++; | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  |  | 
|  | @end |