Baffled by Binary event.body in Lambda Function

Site: https://gallant-stonebraker-875750.netlify.app/

I am completely baffled on why a Binary PUT is working fine locally, but is not working once deployed. I am trying to take a PUT to my lambda and turn around and upload it to Amazon S3. In both local and production, the file gets to S3. However, the format is invalid. When posting the file to Amazon S3, I am using this as my PUT:

data: parseBody( event.body, event.isBase64Encoded )

Where the parseBody() method is as such:

function parseBody( body, isBase64Encoded ) {

	if ( body instanceof Buffer ) {

		return( body );

	}

	if ( isBase64Encoded ) {

		return( Buffer.from( body, "base64" ) );

	}

	return( Buffer.from( body ) );

}

Locally, using netlify-lambda serve, this works just fine. I can see that event.body is of type Buffer in the local environment. However, once it goes to production, I can see that it is a String, that is not base64-encoded. As such, it is using that Buffer.from(body) line.

When I do this locally, the file that lands on S3 is 81-bytes. When I do it in production, the same file lands on S3 is 113-bytes (and is corrupted).

When I try to print the Binary that is being PUT to S3, locally it is:

'ļæ½PNG\r' +
  '\n\u001a\n\u0000\u0000\u0000\r' +
  'IHDR\u0000\u0000\u0000\n\u0000\u0000\u0000\n' +
  '\b\u0006\u0000\u0000\u0000ļæ½2Ļ½\u0000\u0000\u0000\u0018IDAT(Scļæ½ļæ½ļæ½ļæ½\bļæ½8ļæ½\u0010_(Q?x\u0000eļæ½\u0013ļæ½ļæ½U\u0006ļæ½\u0000\u0000\u0000\u0000IENDļæ½B`ļæ½'

And, in production, it is:

'ļæ½PNG\r' +
  '\n\u001a\n\u0000\u0000\u0000\r' +
  'IHDR\u0000\u0000\u0000\n\u0000\u0000\u0000\n' +
  '\b\u0006\u0000\u0000\u0000ļæ½2Ļ½\u0000\u0000\u0000\u0018IDAT(Scļæ½ļæ½ļæ½ļæ½ļæ½ļæ½\bļæ½8ļæ½\u0010_(Q?x\u0000eļæ½\u0013ļæ½ļæ½U\u0006ļæ½\u0000\u0000\u0000\u0000IENDļæ½B`ļæ½'

As you can see , these stringified binaries are almost the same, but not quite (Scļæ½ļæ½ļæ½ļæ½ vs Scļæ½ļæ½ļæ½ļæ½ļæ½ļæ½).

I cannot figure out what is going wrong. Iā€™ve been at this for 2-days, pouring through Google, pouring through the Support Forums ā€“ nothing makes sense.

For the sake of transparency, hereā€™s my entire function at the moment:

// Using the dotenv package allows us to have local-versions of our ENV variables in a
// .env file while still using different build-time ENV variables in production.
require( "dotenv" ).config();

// ----------------------------------------------------------------------------------- //
// ----------------------------------------------------------------------------------- //

// Require core node modules.
var axios = require( "axios" ).default;
var Buffer = require( "buffer" ).Buffer;

// ----------------------------------------------------------------------------------- //
// ----------------------------------------------------------------------------------- //

// I am the Netlify Function handler.
export async function handler( event, context ) {

console.log( "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *" );
console.log( "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *" );
console.log( event );
console.log( "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *" );
console.log( "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *" );

	var corsHeaders = {
		"Access-Control-Allow-Origin" : process.env.NETLIFY_ACCESS_CONTROL_ALLOW_ORIGIN,
		"Access-Control-Allow-Headers": "*",
		"Access-Control-Allow-Methods": "*"
	};

	// In the case of a CORS preflight check, just return early.
	if ( event.httpMethod === "OPTIONS" ) {

		return({
			statusCode: 200,
			headers: corsHeaders,
			body: JSON.stringify( corsHeaders )
		});

	}

	var putHeaders = {
		"User-Agent": ( event.headers[ "user-agent" ] || "netlify-functions" ),
		"Content-Type": "application/octet-stream"
		// "Content-Length": 81 // event.headers[ "content-length" ]
	};

	var putUrl = Buffer.from( event.queryStringParameters.remoteUrl, "hex" ).toString( "utf8" );

	putUrl += "&name=" + encodeURIComponent( event.queryStringParameters.name );

	try {

		var putResponse = await axios({
			method: "put",
			url: putUrl,
			headers: putHeaders,
			data: parseBody( event.body, event.isBase64Encoded ),
			responseType: "text"
		});

		var configWithoutData = { ...putResponse.config };
		// delete( configWithoutData.data );

		return({
			statusCode: 200,
			headers: corsHeaders,
			body: JSON.stringify({
				message: "Woot!",
				putHeaders: putHeaders,
				successData: putResponse.data,
				successStatus: putResponse.status,
				successStatusText: putResponse.statusText,
				successHeaders: putResponse.headers,
				successConfig: configWithoutData,
				eventBody: event.body,
				event: event,
				bodyType: typeof( event.body )
			})
		});

	} catch ( error ) {

		return({
			statusCode: 500,
			headers: corsHeaders,
			body: JSON.stringify({
				errorData: error.response.data,
				errorStatus: error.response.status,
				errorHeaders: error.response.headers
			})
		});

	}

}

// ----------------------------------------------------------------------------------- //
// ----------------------------------------------------------------------------------- //

// I returns the parsed body payload.
// --
// CAUTION: Throws error if body cannot be parsed as JSON.
function parseBody( body, isBase64Encoded ) {

	if ( body instanceof Buffer ) {

		return( body );

	}

	if ( isBase64Encoded ) {

		return( Buffer.from( body, "base64" ) );

	}

	return( Buffer.from( body ) );

}

Hey @bennadel,
Weā€™re looking into this! One thing that would be helpful: can you check in your browser dev tools to see what content-type header is set when you call the Netlify function (weā€™re assuming itā€™s being called from a frontend web app somewhere since it doesnā€™t appear to be called from the website where the function is hosted)?

The reason I ask is because we currently treat everything as binary that has a content-type not beginning with text or application.

Yes, the browser is posting with:

content-type: application/octet-stream

That said, I can also replicate the issue with Postman. If I save the Postman request as a curl command, this is what it gives me:

curl --location --request PUT 'https://gallant-stonebraker-875750.netlify.app/.netlify/functions/upload-to-s3?remoteUrl=xxxxxxxxxxxxxxxx&name=cyna.png' \
--header 'Origin: https://xxxxxxxxxxxxxxxx.com' \
--header 'Content-Type: application/octet-stream' \
--data-binary '@/Users/bennadel/Downloads/cyna.png'

Now, the cyna.png image that Iā€™m uploading is a 1x1 pixel PNG that is 70-bytes. And the file that makes it to S3 is 84-bytes. If I download the S3 image and to a text-diff on them, I get this (the S3-version via Netlify is on the left):

As you can see, they are really close, but not quite. Itā€™s like something is getting messed-up in the binary encoding.

And, to be clear, its not this image ā€“ any image PNG, or JPEG, does the same thing. Itā€™s slightly off and corrupted.

@jen OK, I tried to isolate the issue, remove all the other cruft from the demo, and I can easily reproduce it now. Basically, all I have to do is compare the content-length to the event.body length (once normalized as a Buffer). Hereā€™s my demo functions:

var Buffer = require( "buffer" ).Buffer;

// ----------------------------------------------------------------------------------- //
// ----------------------------------------------------------------------------------- //

export async function handler( event, context ) {

	// Normalize body into a Buffer.
	var bodyBuffer = ( event.body instanceof Buffer )
		? event.body
		: Buffer.from( event.body, ( event.isBase64Encoded ? "base64" : "utf8" ) )
	;

	// Create a copy of the event for echoing that won't include the massive body.
	var eventCopy = { ...event };
	delete( eventCopy.body );

	return({
		statusCode: 200,
		body: JSON.stringify({
			contentLength: ( +event.headers[ "content-length" ] || "not-provided" ),
			bodyLength: bodyBuffer.length,
			eventCopy: eventCopy
		})
	});

} 

As you can see, all Iā€™m doing here is:

  1. Coercing the event.body to be a Buffer based on the encoding.
  2. Echoing the content-length header (sent by the client) and the .length of the Buffer.

If I run this locally using lambda-serve, I get the following output:

{
    "contentLength": 70,
    "bodyLength": 70,
    "eventCopy": {
        "path": "/inspect-body",
        "httpMethod": "PUT",
        "queryStringParameters": {},
        "headers": {
            "content-type": "application/octet-stream",
            "user-agent": "PostmanRuntime/7.25.0",
            "accept": "*/*",
            "cache-control": "no-cache",
            "postman-token": "cfa903c7-fdab-404d-9b3b-843faf7f1baa",
            "host": "localhost:9000",
            "accept-encoding": "gzip, deflate, br",
            "connection": "keep-alive",
            "content-length": "70"
        },
        "isBase64Encoded": false
    }
}

Locally with lambda-serve it works fine ā€“ both the content-length and the body .length are 70.

Now, if I run the same request against production, I get the following:

{
    "contentLength": 70,
    "bodyLength": 84,
    "eventCopy": {
        "path": "/.netlify/functions/inspect-body",
        "httpMethod": "PUT",
        "headers": {
            "accept": "*/*",
            "cache-control": "no-cache",
            "client-ip": "xxxxxxxxxx",
            "connection": "keep-alive",
            "content-length": "70",
            "content-type": "application/octet-stream",
            "origin": "xxxxxxxxxx",
            "postman-token": "bb1d1a1c-3f95-4cb7-8f54-1bd270dfabe9",
            "user-agent": "PostmanRuntime/7.25.0",
            "via": "https/1.1 Netlify[b3024e0e-7358-451a-a734-1a757aafbf44] (ApacheTrafficServer/7.1.11)",
            "x-bb-ab": "0.285616",
            "x-bb-client-request-uuid": "b3024e0e-7358-451a-a734-1a757aafbf44-3451633",
            "x-bb-ip": "xxxxxxxxxx",
            "x-bb-loop": "1",
            "x-cdn-domain": "www.bitballoon.com",
            "x-country": "US",
            "x-datadog-parent-id": "1434013172083956058",
            "x-datadog-sampling-priority": "0",
            "x-datadog-trace-id": "5612490752269226137",
            "x-forwarded-for": "xxxxxxxxxx",
            "x-forwarded-proto": "https",
            "x-nf-client-connection-ip": "xxxxxxxxxx"
        },
        "multiValueHeaders": {
            "Accept": [
                "*/*"
            ],
            "Cache-Control": [
                "no-cache"
            ],
            "Client-Ip": [
                "xxxxxxxxxx"
            ],
            "Connection": [
                "keep-alive"
            ],
            "Content-Length": [
                "70"
            ],
            "Content-Type": [
                "application/octet-stream"
            ],
            "Origin": [
                "xxxxxxxxxx"
            ],
            "Postman-Token": [
                "bb1d1a1c-3f95-4cb7-8f54-1bd270dfabe9"
            ],
            "User-Agent": [
                "PostmanRuntime/7.25.0"
            ],
            "Via": [
                "https/1.1 Netlify[b3024e0e-7358-451a-a734-1a757aafbf44] (ApacheTrafficServer/7.1.11)"
            ],
            "X-Bb-Ab": [
                "0.285616"
            ],
            "X-Bb-Client-Request-Uuid": [
                "b3024e0e-7358-451a-a734-1a757aafbf44-3451633"
            ],
            "X-Bb-Ip": [
                "xxxxxxxxxx"
            ],
            "X-Bb-Loop": [
                "1"
            ],
            "X-Cdn-Domain": [
                "www.bitballoon.com"
            ],
            "X-Country": [
                "US"
            ],
            "X-Datadog-Parent-Id": [
                "1434013172083956058"
            ],
            "X-Datadog-Sampling-Priority": [
                "0"
            ],
            "X-Datadog-Trace-Id": [
                "5612490752269226137"
            ],
            "X-Forwarded-For": [
                "xxxxxxxxxx"
            ],
            "X-Forwarded-Proto": [
                "https"
            ],
            "X-Nf-Client-Connection-Ip": [
                "xxxxxxxxxx"
            ]
        },
        "queryStringParameters": {},
        "multiValueQueryStringParameters": {},
        "isBase64Encoded": false
    }
}

This time, notice that the content-length is 70, but the .length on the body is 84.

Hopefully this isolated example helps!

1 Like

And, hereā€™s the curl version of my PUT:

curl --location --request PUT 'https://gallant-stonebraker-875750.netlify.app/.netlify/functions/inspect-body' \
--header 'Origin: https://xxxxxxxxxx.com' \
--header 'Content-Type: application/octet-stream' \
--data-binary '@/Users/bennadel/Downloads/cyna.png'
1 Like

This is incredibly helpful- we really super appreciate it. Iā€™ve reproduced your issue and set up a copy of your function for our engineers to use for testing. At this point, it seems like a bug on our end with how we interpret content-type: application/* headers and decide based on that whether or not to base64 encode the binary data.

In my testing, when I POST a png and change the content-type header from application/octet-stream to image/png I seem to get the encoding Iā€™d expect. Is that something you could try?

1 Like

I can confirm that if I switch from the generic binary-type to an image/* type, then I am getting the Base64-encoding.

When I have a chance later tonight, Iā€™ll check to see if trying the same with an Image results in the proper image making itā€™s way to S3.

@jen Yes, I can confirm (at least via Postman) that if I switch from application/octet-stream to image/png, the file that gets proxied-up to S3 is valid. It looks like it may be one or two bytes off; but, that difference may just be some sort of ??padding??:

The one on the right is the original. Not sure what that ? represents. But, like I said, the one that ends up on S3 is still valid despite the ? thinger. When I download it, it can be opened and viewed like an image.

So, yep, it looks like itā€™s something with the headers. Unfortunately, in my particular context, I donā€™t have much control over which headers are being sent. But, at least weā€™re on the right track!

1 Like

We got the bug filed internally and it seems well-scoped to me. I canā€™t say if/when it will be fixed, though, unfortunately :frowning: I was hoping it might be possible to use our custom headers to ā€œoverwriteā€ the content-type header, but no dice.

I will definitely update here if there are updates and, again, really appreciate your debugging work!

@jen good stuff.

For images, using the more specific mime-type sounds like a reasonable work-around. That said, I think there are some binary mime-types that do legitimately have application/* values (like application/pdf and application/zip). It sounds like, at this time, there would be no real work-around for those.

Cheers!

Yes, agreed re: those mime-types! Handling those correctly will be part of the bugfix, I would assume.

And youā€™re correct that thereā€™s no real workaround for those at this point since our proxy currently assumes all application/* content-type headers contain payloads that do not need to be base64 encoded. Although one last terrible idea you could try would be POSTing a zip or pdf with image/png header so that it would be encoded on the way into the function, and then maybe when you pass it on to AWS, you could update the header to be the correct one.

1 Like

Ah, totally legit idea :+1:

1 Like

Thanks Ben and Jen for this thread, I was in a very similar situation and was finally able to fix it in production. (using image/* content-types as suggested)

However, I still canā€™t get it to work locally. Iā€™m using netlify dev, not netlify-lambda, for local development and looking at the netlify-cli source code there seems to be no way of forcing base64 encoding, it always assumes utf8, and so my images get corrupted.
Could this be looked at as well, on the netlify-cli side of things?

Thanks!

Edit: created a PR for that change Use base64 encoding for binary types by ruiramos Ā· Pull Request #975 Ā· netlify/cli Ā· GitHub

2 Likes

Thatā€™s amazing, thanks so much for the PR!

@jen

Iā€™m finally getting around to testing the breaking changes you made in https://answers.netlify.com/t/changed-behavior-in-function-body-encoding/18991/1 (that said, this URL no longer seems to be available for some reason). But, Iā€™m still having trouble getting a binary upload to work with application/octet-stream. Iā€™ve updated my test function to use the following code:

var Buffer = require( "buffer" ).Buffer;

export async function handler( event, context ) {

	var bodyBuffer = ( event.body instanceof Buffer )
		? event.body
		: Buffer.from( event.body, ( event.isBase64Encoded ? "base64" : "utf8" ) )
	;

	// Create a copy of the event for echoing that won't include the massive body.
	var eventCopy = { ...event };
	delete( eventCopy.body );

	return({
		statusCode: 200,
		body: JSON.stringify({
			contentType: event.headers[ "content-type" ],
			contentLength: ( +event.headers[ "content-length" ] || "not-provided" ),
			bodyLength: bodyBuffer.length,
			bodyIsBuffer: ( event.body instanceof Buffer ),
			bodyIsString: ( typeof( event.body ) === "string" ),
			bodyIsBase64: event.isBase64Encoded,
			eventCopy: eventCopy
		})
	});

}

Now, if I try to upload a file to the given function, I get the following response:

{
    "contentType": "application/octet-stream",
    "contentLength": 145652,
    "bodyLength": 275070,
    "bodyIsBuffer": false,
    "bodyIsString": true,
    "bodyIsBase64": false,
    .... truncated ....
}

For some reason, the bodyLength is showing about 2x the size of the content-length header.

Also, notice that it is not being Base64 encoded. However, going back to your ā€œbreaking changesā€ announcement from the other week:

If the content-type is anything else, we will base64 encode the body. This will ensure that binary media types like application/octet-stream and application/pdf arrive at their final destinations intact.

From that, I would assume that the body should be Base64-encoded.

I am not sure what I am doing wrong?

hey @bennadel, just a quick note to let you know Jen is out on PTO this week.

I will make sure it gets picked up again when sheā€™s back. Thanks for your patience, we are actively looking at this.

1 Like

@bennadel, that url was set to private since the new functionality hasnā€™t been shipped yet. Thatā€™s probably why you are still having trouble. The url should become available again when the change has been released.

Ah, good to know - thank you for the clarity. I had thought the feature had already been deployed when that link went around. Iā€™ll stand-by for future updates :+1: