
#import "MultipartFormDataParser.h"
#import "DDData.h"
#import "HTTPLogging.h"

#pragma mark log level

#ifdef DEBUG
static const int httpLogLevel = HTTP_LOG_LEVEL_WARN;
#else
static const int httpLogLevel = HTTP_LOG_LEVEL_WARN;
#endif

#ifdef __x86_64__
#define FMTNSINT "li"
#else
#define FMTNSINT "i"
#endif


//-----------------------------------------------------------------
// interface MultipartFormDataParser (private)
//-----------------------------------------------------------------


@interface MultipartFormDataParser (private)
+ (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding;

- (int) findHeaderEnd:(NSData*) workingData fromOffset:(int) offset;
- (int) findContentEnd:(NSData*) data fromOffset:(int) offset;

- (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(NSUInteger) length encoding:(int) encoding;
- (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data;

- (int) processPreamble:(NSData*) workingData;

@end


//-----------------------------------------------------------------
// implementation MultipartFormDataParser
//-----------------------------------------------------------------


@implementation MultipartFormDataParser 
@synthesize delegate,formEncoding;

- (id) initWithBoundary:(NSString*) boundary formEncoding:(NSStringEncoding) _formEncoding {
    if( nil == (self = [super init]) ){
        return self;
    }
    if( nil == boundary ) {
        HTTPLogWarn(@"MultipartFormDataParser: init with zero boundary");
        return nil;
    }
    boundaryData = [[@"\r\n--" stringByAppendingString:boundary] dataUsingEncoding:NSASCIIStringEncoding];

    pendingData = [[NSMutableData alloc] init];
    currentEncoding = contentTransferEncoding_binary;
    currentHeader = nil;

    formEncoding = _formEncoding;
    reachedEpilogue = NO;
    processedPreamble = NO;

    return self;
}


- (BOOL) appendData:(NSData *)data { 
    // Can't parse without boundary;
    if( nil == boundaryData ) {
        HTTPLogError(@"MultipartFormDataParser: Trying to parse multipart without specifying a valid boundary");
        assert(false);
        return NO;
    }
    NSData* workingData = data;

    if( pendingData.length ) {
        [pendingData appendData:data];
        workingData = pendingData;
    }

    // the parser saves parse stat in the offset variable, which indicates offset of unhandled part in 
    // currently received chunk. Before returning, we always drop all data up to offset, leaving 
    // only unhandled for the next call

    int offset = 0;

    // don't parse data unless its size is greater then boundary length, so we couldn't
    // misfind the boundary, if it got split into different data chunks
    NSUInteger sizeToLeavePending = boundaryData.length;

    if( !reachedEpilogue && workingData.length <= sizeToLeavePending )  {
        // not enough data even to start parsing.
        // save to pending data.
        if( !pendingData.length ) {
            [pendingData appendData:data];
        }
        if( checkForContentEnd ) {
            if( pendingData.length >= 2 ) {
                if( *(uint16_t*)(pendingData.bytes + offset) == 0x2D2D ) {
                    // we found the multipart end. all coming next is an epilogue.
                    HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message");
                    waitingForCRLF = YES;
                    reachedEpilogue = YES;
                    offset+= 2;
                }
                else {
                    checkForContentEnd = NO;
                    waitingForCRLF = YES;
                    return YES;
                }
            } else {
                return YES;
            }
            
        }
        else {
            return YES;
        }
    }
    while( true ) {
        if( checkForContentEnd ) {
            // the flag will be raised to check if the last part was the last one.
            if( offset < workingData.length -1 ) {
                char* bytes = (char*) workingData.bytes;
                if( *(uint16_t*)(bytes + offset) == 0x2D2D ) {
                    // we found the multipart end. all coming next is an epilogue.
                    HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message");
                    checkForContentEnd = NO;
                    reachedEpilogue = YES;
                    // still wait for CRLF, that comes after boundary, but before epilogue.
                    waitingForCRLF = YES;
                    offset += 2;
                }
                else {
                    // it's not content end, we have to wait till separator line end before next part comes
                    waitingForCRLF = YES;
                    checkForContentEnd = NO;
                }
            }
            else {
                // we haven't got enough data to check for content end.
                // save current unhandled data (it may be 1 byte) to pending and recheck on next chunk received
                if( offset < workingData.length ) {
                    [pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]];
                }
                else {
                    // there is no unhandled data now, wait for more chunks
                    [pendingData setData:[NSData data]];
                }
                return YES;
            }
        }
        if( waitingForCRLF ) {

            // the flag will be raised in the code below, meaning, we've read the boundary, but
            // didnt find the end of boundary line yet.

            offset = [self offsetTillNewlineSinceOffset:offset inData:workingData];
            if( -1 == offset ) {
                // didnt find the endl again.
                if( offset ) {
                    // we still have to save the unhandled data (maybe it's 1 byte CR)
                    if( *((char*)workingData.bytes + workingData.length -1) == '\r' ) {
                        [pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]];
                    }
                    else {
                        // or save nothing if it wasnt 
                        [pendingData setData:[NSData data]];
                    }
                }
                return YES;
            }
            waitingForCRLF = NO;
        }
        if( !processedPreamble ) {
            // got to find the first boundary before the actual content begins.
            offset = [self processPreamble:workingData];
            // wait for more data for preamble
            if( -1 == offset ) 
                return YES;
            // invoke continue to skip newline after boundary.
            continue;
        }

        if( reachedEpilogue ) {
            // parse all epilogue data to delegate.
            if( [delegate respondsToSelector:@selector(processEpilogueData:)] ) {
                NSData* epilogueData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length: workingData.length - offset freeWhenDone:NO];
                [delegate processEpilogueData: epilogueData];
            }
            return YES;
        }

        if( nil == currentHeader ) {
            // nil == currentHeader is a state flag, indicating we are waiting for header now.
            // whenever part is over, currentHeader is set to nil.

            // try to find CRLFCRLF bytes in the data, which indicates header end.
            // we won't parse header parts, as they won't be too large.
            int headerEnd = [self findHeaderEnd:workingData fromOffset:offset];
            if( -1 == headerEnd ) {
                // didn't recieve the full header yet.
                if( !pendingData.length) {
                    // store the unprocessed data till next chunks come
                    [pendingData appendBytes:data.bytes + offset length:data.length - offset];
                }
                else {
                    if( offset ) {
                        // save the current parse state; drop all handled data and save unhandled only.
                        pendingData = [[NSMutableData alloc] initWithBytes: (char*) workingData.bytes + offset length:workingData.length - offset];
                    }
                }
                return  YES;
            }
            else {

                // let the header parser do it's job from now on.
                NSData * headerData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length:headerEnd + 2 - offset freeWhenDone:NO];
                currentHeader = [[MultipartMessageHeader alloc] initWithData:headerData formEncoding:formEncoding];

                if( nil == currentHeader ) {
                    // we've found the data is in wrong format.
                    HTTPLogError(@"MultipartFormDataParser: MultipartFormDataParser: wrong input format, coulnd't get a valid header");
                    return NO;
                }
                if( [delegate respondsToSelector:@selector(processStartOfPartWithHeader:)] ) {
                    [delegate processStartOfPartWithHeader:currentHeader];
                }

                HTTPLogVerbose(@"MultipartFormDataParser: MultipartFormDataParser: Retrieved part header.");
            }
            // skip the two trailing \r\n, in addition to the whole header.
            offset = headerEnd + 4; 
        }
        // after we've got the header, we try to
        // find the boundary in the data.
        int contentEnd = [self findContentEnd:workingData fromOffset:offset];
        
        if( contentEnd == -1 ) {

            // this case, we didn't find the boundary, so the data is related to the current part.
            // we leave the sizeToLeavePending amount of bytes to make sure we don't include 
            // boundary part in processed data.
            NSUInteger sizeToPass = workingData.length - offset - sizeToLeavePending;

            // if we parse BASE64 encoded data, or Quoted-Printable data, we will make sure we don't break the format
            int leaveTrailing = [self numberOfBytesToLeavePendingWithData:data length:sizeToPass encoding:currentEncoding];
            sizeToPass -= leaveTrailing;
            
            if( sizeToPass <= 0 ) {
                // wait for more data!
                if( offset ) {
                    [pendingData setData:[NSData dataWithBytes:(char*) workingData.bytes + offset length:workingData.length - offset]];
                }
                return YES;
            }
            // decode the chunk and let the delegate use it (store in a file, for example)
            NSData* decodedData = [MultipartFormDataParser decodedDataFromData:[NSData dataWithBytesNoCopy:(char*)workingData.bytes + offset length:workingData.length - offset - sizeToLeavePending freeWhenDone:NO] encoding:currentEncoding];
            
            if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) {
                HTTPLogVerbose(@"MultipartFormDataParser: Processed %"FMTNSINT" bytes of body",sizeToPass);

                [delegate processContent: decodedData WithHeader:currentHeader];
            }

            // store the unprocessed data till the next chunks come.
            [pendingData setData:[NSData dataWithBytes:(char*)workingData.bytes + workingData.length - sizeToLeavePending length:sizeToLeavePending]];
            return YES;
        }
        else {

            // Here we found the boundary.
            // let the delegate process it, and continue going to the next parts.
            if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) {
                [delegate processContent:[NSData dataWithBytesNoCopy:(char*) workingData.bytes + offset length:contentEnd - offset freeWhenDone:NO] WithHeader:currentHeader];
            }

            if( [delegate respondsToSelector:@selector(processEndOfPartWithHeader:)] ){
                [delegate processEndOfPartWithHeader:currentHeader];
                HTTPLogVerbose(@"MultipartFormDataParser: End of body part");
            }
            currentHeader = nil;

            // set up offset to continue with the remaining data (if any)
            // cast to int because above comment suggests a small number
            offset = contentEnd + (int)boundaryData.length;
            checkForContentEnd = YES;
            // setting the flag tells the parser to skip all the data till CRLF
        }
    }
    return YES;
}


//-----------------------------------------------------------------
#pragma mark private methods

- (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data {
    char* bytes = (char*) data.bytes;
    NSUInteger length = data.length;
    if( offset >= length - 1 ) 
        return -1;

    while ( *(uint16_t*)(bytes + offset) != 0x0A0D ) {
        // find the trailing \r\n after the boundary. The boundary line might have any number of whitespaces before CRLF, according to rfc2046

        // in debug, we might also want to know, if the file is somehow misformatted.
#ifdef DEBUG
        if( !isspace(*(bytes+offset)) ) {
            HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset) );
        }
        if( !isspace(*(bytes+offset+1)) ) {
            HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset+1) );
        }
#endif
        offset++;
        if( offset >= length ) {
            // no endl found within current data
            return -1;
        }
    }

    offset += 2;
    return offset;
}


- (int) processPreamble:(NSData*) data {
    int offset = 0;
    
    char* boundaryBytes = (char*) boundaryData.bytes + 2; // the first boundary won't have CRLF preceding.
    char* dataBytes = (char*) data.bytes;
    NSUInteger boundaryLength = boundaryData.length - 2;
    NSUInteger dataLength = data.length;
    
    // find the boundary without leading CRLF.
    while( offset < dataLength - boundaryLength +1 ) {
        int i;
        for( i = 0;i < boundaryLength; i++ ) {
            if( boundaryBytes[i] != dataBytes[offset + i] )
                break;
        }
        if( i == boundaryLength ) {
            break;
        }
        offset++;
    }
    
    if( offset == dataLength ) {
        // the end of preamble wasn't found in this chunk
        NSUInteger sizeToProcess = dataLength - boundaryLength;
        if( sizeToProcess > 0) {
            if( [delegate respondsToSelector:@selector(processPreambleData:)] ) {
                NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: data.length - offset - boundaryLength freeWhenDone:NO];
                [delegate processPreambleData:preambleData];
                HTTPLogVerbose(@"MultipartFormDataParser: processed preamble");
            }
            pendingData = [NSMutableData dataWithBytes: data.bytes + data.length - boundaryLength length:boundaryLength];
        }
        return -1;
    }
    else {
        if ( offset && [delegate respondsToSelector:@selector(processPreambleData:)] ) {
            NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: offset freeWhenDone:NO];
            [delegate processPreambleData:preambleData];
        }
        offset +=boundaryLength;
        // tells to skip CRLF after the boundary.
        processedPreamble = YES;
        waitingForCRLF = YES;
    }
    return offset;
}



- (int) findHeaderEnd:(NSData*) workingData fromOffset:(int)offset {
    char* bytes = (char*) workingData.bytes; 
    NSUInteger inputLength = workingData.length;
    uint16_t separatorBytes = 0x0A0D;

    while( true ) {
        if(inputLength < offset + 3 ) {
            // wait for more data
            return -1;
        }
        if( (*((uint16_t*) (bytes+offset)) == separatorBytes) && (*((uint16_t*) (bytes+offset)+1) == separatorBytes) ) {
            return offset;
        }
        offset++;
    }
    return -1;
}


- (int) findContentEnd:(NSData*) data fromOffset:(int) offset {
    char* boundaryBytes = (char*) boundaryData.bytes;
    char* dataBytes = (char*) data.bytes;
    NSUInteger boundaryLength = boundaryData.length;
    NSUInteger dataLength = data.length;
    
    while( offset < dataLength - boundaryLength +1 ) {
        int i;
        for( i = 0;i < boundaryLength; i++ ) {
            if( boundaryBytes[i] != dataBytes[offset + i] )
                break;
        }
        if( i == boundaryLength ) {
            return offset;
        }
        offset++;
    }
    return -1;
}


- (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(int) length encoding:(int) encoding {
    // If we have BASE64 or Quoted-Printable encoded data, we have to be sure
    // we don't break the format.
    int sizeToLeavePending = 0;
    
    if( encoding == contentTransferEncoding_base64 ) {  
        char* bytes = (char*) data.bytes;
        int i;
        for( i = length - 1; i > 0; i++ ) {
            if( * (uint16_t*) (bytes + i) == 0x0A0D ) {
                break;
            }
        }
        // now we've got to be sure that the length of passed data since last line
        // is multiplier of 4.
        sizeToLeavePending = (length - i) & ~0x11; // size to leave pending = length-i - (length-i) %4;
        return sizeToLeavePending;
    }
    
    if( encoding == contentTransferEncoding_quotedPrintable ) {
        // we don't pass more less then 3 bytes anyway.
        if( length <= 2 ) 
            return length;
        // check the last bytes to be start of encoded symbol.
        const char* bytes = data.bytes + length - 2;
        if( bytes[0] == '=' )
            return 2;
        if( bytes[1] == '=' )
            return 1;
        return 0;
    }
    return 0;
}


//-----------------------------------------------------------------
#pragma mark decoding


+ (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding {
    switch (encoding) {
        case contentTransferEncoding_base64: {
            return [data base64Decoded]; 
        } break;

        case contentTransferEncoding_quotedPrintable: {
            return [self decodedDataFromQuotedPrintableData:data];
        } break;

        default: {
            return data;
        } break;
    }
}


+ (NSData*) decodedDataFromQuotedPrintableData:(NSData *)data {
//  http://tools.ietf.org/html/rfc2045#section-6.7

    const char hex []  = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', };

    NSMutableData* result = [[NSMutableData alloc] initWithLength:data.length];
    const char* bytes = (const char*) data.bytes;
    int count = 0;
    NSUInteger length = data.length;
    while( count < length ) {
        if( bytes[count] == '=' ) {
            [result appendBytes:bytes length:count];
            bytes = bytes + count + 1;
            length -= count + 1;
            count = 0;

            if( length < 3 ) {
                HTTPLogWarn(@"MultipartFormDataParser: warning, trailing '=' in quoted printable data");
            }
            // soft newline
            if( bytes[0] == '\r' ) {
                bytes += 1;
                if(bytes[1] == '\n' ) {
                    bytes += 2;
                }
                continue;
            }
            char encodedByte = 0;

            for( int i = 0; i < sizeof(hex); i++ ) {
                if( hex[i] == bytes[0] ) {
                    encodedByte += i << 4;
                }
                if( hex[i] == bytes[1] ) {
                    encodedByte += i;
                }
            }
            [result appendBytes:&encodedByte length:1];
            bytes += 2;
        }

#ifdef DEBUG
        if( (unsigned char) bytes[count] > 126 ) {
            HTTPLogWarn(@"MultipartFormDataParser: Warning, character with code above 126 appears in quoted printable encoded data");
        }
#endif
        
        count++;
    }
    return result;
}


@end
