Skip to content

blob: slow and degraded upload performance #3596

@vabatta

Description

@vabatta

Describe the bug

The upload performance when using the drivers (mem, file, s3 the only one tested) is very slow compared to direct use of native/alternative package.

To Reproduce

We will upload a large file ~4.5GB via curl

curl -X POST -F "file=@large.bin" http://localhost:8080/upload

Consider this file upload to local file:

package main

import (
	"fmt"
	"io"
	"net/http"
	"os"
	"path/filepath"
)

func uploadHandler(w http.ResponseWriter, r *http.Request) {
	if r.Method != http.MethodPost {
		http.Error(w, "Only POST allowed", http.StatusMethodNotAllowed)
		return
	}

	// Parse multipart form with 100 MB max memory (rest stored on disk)
	err := r.ParseMultipartForm(100 << 20)
	if err != nil {
		http.Error(w, "Failed to parse multipart form: "+err.Error(), http.StatusBadRequest)
		return
	}

	file, header, err := r.FormFile("file")
	if err != nil {
		http.Error(w, "Failed to get file from form: "+err.Error(), http.StatusBadRequest)
		return
	}
	defer file.Close()

	// Create destination file (save in current dir)
	dstPath := filepath.Join(".", header.Filename)
	dst, err := os.Create(dstPath)
	if err != nil {
		http.Error(w, "Failed to create file: "+err.Error(), http.StatusInternalServerError)
		return
	}
	defer dst.Close()

	// Copy uploaded file to disk
	written, err := io.Copy(dst, file)
	if err != nil {
		http.Error(w, "Failed to save file: "+err.Error(), http.StatusInternalServerError)
		return
	}

	w.WriteHeader(http.StatusOK)
	fmt.Fprintf(w, "Uploaded %s (%d bytes) successfully\n", header.Filename, written)
}

func main() {
	http.HandleFunc("/upload", uploadHandler)
	fmt.Println("Listening on :8080...")
	err := http.ListenAndServe(":8080", nil)
	if err != nil {
		panic(err)
	}
}

On my machine (MacBook Pro M1 Max) it takes about ~8 sec e2e.

The equivalent using blob

package main

import (
	"context"
	"fmt"
	"io"
	"net/http"
	"path/filepath"

	"gocloud.dev/blob"
	_ "gocloud.dev/blob/fileblob"
)

func uploadHandler(bucket *blob.Bucket) http.HandlerFunc {
	return func(w http.ResponseWriter, r *http.Request) {
		if r.Method != http.MethodPost {
			http.Error(w, "Only POST allowed", http.StatusMethodNotAllowed)
			return
		}

		err := r.ParseMultipartForm(100 << 20)
		if err != nil {
			http.Error(w, "Failed to parse multipart form: "+err.Error(), http.StatusBadRequest)
			return
		}

		file, header, err := r.FormFile("file")
		if err != nil {
			http.Error(w, "Failed to get file from form: "+err.Error(), http.StatusBadRequest)
			return
		}
		defer file.Close()

		ctx := r.Context()

		// Use filename as blob key
		key := filepath.Base(header.Filename)

		writer, err := bucket.NewWriter(ctx, key, nil)
		if err != nil {
			http.Error(w, "Failed to create blob writer: "+err.Error(), http.StatusInternalServerError)
			return
		}

		written, err := io.Copy(writer, file)
		if err != nil {
			writer.Close() // ignore error here, main err is from copy
			http.Error(w, "Failed to write to blob: "+err.Error(), http.StatusInternalServerError)
			return
		}

		err = writer.Close()
		if err != nil {
			http.Error(w, "Failed to close blob writer: "+err.Error(), http.StatusInternalServerError)
			return
		}

		w.WriteHeader(http.StatusOK)
		fmt.Fprintf(w, "Uploaded %s (%d bytes) to blob successfully\n", key, written)
	}
}

func main() {
	// Open local directory as blob bucket
	ctx := context.Background()
	bucket, err := blob.OpenBucket(ctx, "file:///tmp/mybucket")
	if err != nil {
		panic(fmt.Errorf("failed to open fileblob bucket: %w", err))
	}
	defer bucket.Close()

	http.HandleFunc("/upload", uploadHandler(bucket))

	fmt.Println("Listening on :8080...")
	err = http.ListenAndServe(":8080", nil)
	if err != nil {
		panic(err)
	}
}

On my machine (MacBook Pro M1 Max) it takes about ~28 sec e2e, roughly a 3.5x slower.

The same slowdown is noted with mem and s3.

Expected behavior

A negligible penalty from the abstraction to a 1.5x max slowdown.

Version

gocloud.dev v0.42.0

Additional context

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions