From d863cee82aad34900144198abf740bb7f75a4642 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Mon, 5 Sep 2022 16:50:20 +0300 Subject: [PATCH] More secure and faster version * No length before each block * Explicitly authenticated last block * Explicit magic prepended * Simplified code --- FORMAT | 17 +++ README | 63 +++-------- go.mod | 8 +- go.sum | 12 +- main.go | 317 +++++++++++++++++++++++++++++++--------------------- poly1305.go | 22 ++++ 6 files changed, 253 insertions(+), 186 deletions(-) create mode 100644 FORMAT create mode 100644 poly1305.go diff --git a/FORMAT b/FORMAT new file mode 100644 index 0000000..27244f3 --- /dev/null +++ b/FORMAT @@ -0,0 +1,17 @@ +Output format is very simple: + +* "GOHPENC\n" magic string +* 32-bit big-endian block length +* 128-bit random salt + + /-------BLOCK---------\ /-------BLOCK---------\ ++-------+----------+------+------------+----------+------------+----------+---- +| MAGIC | BLOCKLEN | SALT | CIPHERTEXT | AUTH TAG | CIPHERTEXT | AUTH TAG | ... ++-------+----------+------+------------+----------+------------+----------+---- + +There is trivial key generation: + keyOrdinary, keyLast = HKDF(SHA512, keySpecified, SALT, AD=MAGIC) + +Each block is encrypted with ChaCha20-Poly1305, using keyOrdinary and +nonce containing 64-bit big-endian counter, starting at zero. Very last +block is encrypted with keyLast and it may have zero length plaintext. diff --git a/README b/README index bacd9b6..46c7543 100644 --- a/README +++ b/README @@ -4,61 +4,34 @@ gohpenc highly resembles hpenc tool (https://github.com/vstakhov/hpenc). hpenc solves the problem that there is no simple tool to quickly transfer data with encryption and authentication: -* openssl enc -- uses single CPU, no authentication -* GnuPG -- complex key generation/management, relatively slow -* OpenSSH -- uses single CPU, not very fast +* openssl enc -- non-parallelized, no authentication +* GnuPG -- non-parallelized, complex key generation/management +* age -- non-parallelized +* OpenSSH -- non-parallelized, not very fast Why gohpenc was written? hpenc has some problems: it does not work on aarch64 and sparc64 architectures under FreeBSD (as seen in the port's Makefile) and produces incompatible output (unauthenticated after 8192 blocks) between FreeBSD and HardenedBSD systems somehow. Instead of painful debugging I decided to write something similar on the Go -language, widening supported platforms. +language, widening supported platforms. But with loose of compatibility +with hpenc. -gohpenc is incompatible with hpenc and much simpler: +Also hpenc won't fail if transmission was truncated (in valid block +bounds). -* it uses only XChaCha20-Poly1305 algorithm -* no random data generation mode -* no metadata in output stream and no structure validation. Only blocks - authentication -* no key derivation -- new key for each block +It uses ChaCha20-Poly1305, parallelized AEAD encryption of blocks. -But it still satisfies most of hpenc aims: + $ key=`gohpenc -psk` + $ echo "message to be transmitted" | gohpenc -k $key > encrypted + $ gohpenc -d -k $key < encrypted -* Very simple key management -- single pre-shared key -* Parallelizeable -- each block is encrypted in different thread, so all - your CPUs could be utilized -* Very fast -- ChaCha20-Poly1305 is fast even on relatively low-end - devices like mobile devices. Despite gohpenc is written on Go, its - dependent libraries contain assembly-optimized code -* Built-in authentication and integrity check with small data overhead +Blocksize can be specified with -b option (in KiBs). By default it uses +1MiB blocks. By default all CPUs are used, that can be overriden with -c +option. If you have got 8 CPUs, then you require (8+1)*1MiB=9MiB of +memory for buffers allocation. -Usage is very simple: - - $ gohpenc -psk - DTGZI5R2HS4YEDSIO56AFKPONE6KJE3Q2QETODDOH3O6UYFPROHQ - $ echo "message to be transmitted" | gohpenc -k DTGZI5R2HS4YEDSIO56AFKPONE6KJE3Q2QETODDOH3O6UYFPROHQ > encrypted - $ gohpenc -k DTGZI5R2HS4YEDSIO56AFKPONE6KJE3Q2QETODDOH3O6UYFPROHQ -d < encrypted - -How encryption/authentication is performed: - -* First 16 bytes of the stream contain random data -- nonce salt -* XChaCha20-Poly1305 algorithm is initialized with the key and 24-byte - nonce, where 16 bytes is the salt, and 8 bytes is 64-bit unsigned - big-endian block number -* 32-bit big-endian value with the length of the block is outputted, - then an encrypted and authenticated block goes further, with - authenticated data containing that 32-bit length value - - /----------BLOCK-------------\ /----------BLOCK------------\ -+------+-----+------------+----------+-----+------------+----------+---- -| SALT | LEN | CIPHERTEXT | AUTH TAG | LEN | CIPHERTEXT | AUTH TAG | ... -+------+-----+------------+----------+-----+------------+----------+---- - -gohpenc preallocates memory for one block for each thread and one block -for buffered reading from stdin. If you want to process data with 1 MiB -blocks in 4 threads, then you have to have at least 5 MiBs of free -memory. Moreover you have at least 1 MiB of free memory on the -decrypting side. +There is random number generation mode (-r option), that just generates +random key and encrypt dummy data in the buffers. gohpenc is free software: see the file COPYING for copying conditions. diff --git a/go.mod b/go.mod index fe390d9..a4d9dc1 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,7 @@ -module go.cypherpunks.ru/gohpenc +module go.cypherpunks.ru/gohpenc/v3 -go 1.12 +go 1.17 -require golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876 +require golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90 + +require golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 // indirect diff --git a/go.sum b/go.sum index 452e5b0..f461fd9 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,4 @@ -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876 h1:sKJQZMuxjOAR/Uo2LBfU90onWEf1dF4C+0hPJCc9Mpc= -golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90 h1:Y/gsMcFOcR+6S6f3YeMKl5g+dZMEWqcz5Czj/GWYbkM= +golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 h1:SrN+KX8Art/Sf4HNj6Zcz06G7VEz+7w9tdXTPOZ7+l4= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/main.go b/main.go index 43eb680..9eacacb 100644 --- a/main.go +++ b/main.go @@ -20,196 +20,253 @@ package main import ( "bufio" - "crypto/cipher" "crypto/rand" + "crypto/sha512" "encoding/base32" "encoding/binary" "flag" "fmt" "io" + "log" "os" "runtime" "sync" + "golang.org/x/crypto/chacha20" "golang.org/x/crypto/chacha20poly1305" + "golang.org/x/crypto/hkdf" "golang.org/x/crypto/poly1305" ) const ( - LenSize = 4 + Magic = "GOHPENC\n" SaltSize = 16 ) -var ( - doPSK = flag.Bool("psk", false, "Generate PSK") - keyB32 = flag.String("k", "", "Encryption key") - decrypt = flag.Bool("d", false, "Decrypt, instead of encrypt") - blockSize = flag.Int("b", 1<<10, "Blocksize, in KiB") - threads = flag.Int("c", runtime.NumCPU(), "Number of threads") - - Base32Codec *base32.Encoding = base32.StdEncoding.WithPadding(base32.NoPadding) - - key []byte - bs int - wg sync.WaitGroup -) - -type Task struct { - ctr uint64 - size int -} +var Base32Codec *base32.Encoding = base32.StdEncoding.WithPadding(base32.NoPadding) type Worker struct { - aead cipher.AEAD - nonce []byte - input []byte - ready chan struct{} - task chan Task - output chan []byte - written chan struct{} + ctr uint64 + buf []byte + readyIn chan struct{} + readyOut chan struct{} + last bool } -func NewWorker(key, salt []byte) *Worker { - aead, err := chacha20poly1305.NewX(key) - if err != nil { - panic(err) - } - w := Worker{ - aead: aead, - nonce: make([]byte, chacha20poly1305.NonceSizeX), - input: make([]byte, LenSize+bs+poly1305.TagSize), - ready: make(chan struct{}), - task: make(chan Task), - output: make(chan []byte), - written: make(chan struct{}), - } - copy(w.nonce, salt) - go w.Run() - return &w -} - -func (w *Worker) Run() { - var output []byte +func readBuf(dst []byte, src io.Reader) ([]byte, error) { + var n, full int var err error - for { - w.ready <- struct{}{} - task := <-w.task - binary.BigEndian.PutUint64(w.nonce[SaltSize:], task.ctr) - if *decrypt { - output, err = w.aead.Open( - w.input[:LenSize], - w.nonce, - w.input[LenSize:LenSize+task.size+poly1305.TagSize], - w.input[:LenSize], - ) - if err != nil { - panic(err) + for full < len(dst) { + n, err = src.Read(dst[full:]) + full += n + if err != nil { + if err == io.EOF { + break } - output = output[LenSize:] - } else { - binary.BigEndian.PutUint32(w.input, uint32(task.size)) - output = w.aead.Seal( - w.input[:LenSize], - w.nonce, - w.input[LenSize:LenSize+task.size], - w.input[:LenSize], - ) - } - w.output <- output - <-w.written - wg.Done() + return nil, err + } } + return dst[:full], err +} + +type DummyReader struct{} + +func (r *DummyReader) Read(b []byte) (int, error) { + return len(b), nil } func main() { + doRNG := flag.Bool("r", false, "Random number generator") + doGen := flag.Bool("psk", false, "Generate key") + doDec := flag.Bool("d", false, "Decrypt, instead of encrypt") + bs := flag.Int("b", 1<<10, "Blocksize, KiB") + jobs := flag.Int("c", runtime.NumCPU(), "Number of parallel threads") + keyB32 := flag.String("k", "", "Encryption key") flag.Parse() + log.SetFlags(log.Ldate | log.Lmicroseconds | log.Lshortfile) - if *doPSK { - key := make([]byte, chacha20poly1305.KeySize) + var err error + key := make([]byte, chacha20poly1305.KeySize) + if *doGen || *doRNG { if _, err := io.ReadFull(rand.Reader, key); err != nil { - panic(err) + log.Fatalln(err) + } + if *doGen { + fmt.Println(Base32Codec.EncodeToString(key)) + return + } + } else { + key, err = Base32Codec.DecodeString(*keyB32) + if err != nil { + log.Fatalln(err) + } + if len(key) != chacha20poly1305.KeySize { + log.Fatalln("invalid key size") } - fmt.Println(Base32Codec.EncodeToString(key)) - return } - var err error - key, err = Base32Codec.DecodeString(*keyB32) - if err != nil { - panic(err) - } - if len(key) != chacha20poly1305.KeySize { - panic("Invalid key size") - } salt := make([]byte, SaltSize) - - if *decrypt { + if *doDec { + if _, err = io.ReadFull(os.Stdin, salt[:len(Magic)]); err != nil { + log.Fatalln(err) + } + if string(salt[:len(Magic)]) != Magic { + log.Fatalln("invalid magic") + } + if _, err = io.ReadFull(os.Stdin, salt[:4]); err != nil { + log.Fatalln(err) + } + *bs = int(binary.BigEndian.Uint32(salt[:4])) if _, err = io.ReadFull(os.Stdin, salt); err != nil { - panic(err) + log.Fatalln(err) } } else { + if _, err = os.Stdout.WriteString(Magic); err != nil { + log.Fatalln(err) + } + *bs = *bs * 1024 + binary.BigEndian.PutUint32(salt, uint32(*bs)) + if _, err = os.Stdout.Write(salt[:4]); err != nil { + log.Fatalln(err) + } if _, err = io.ReadFull(rand.Reader, salt); err != nil { - panic(err) + log.Fatalln(err) } if _, err = os.Stdout.Write(salt); err != nil { - panic(err) + log.Fatalln(err) } } - bs = *blockSize * (1 << 10) - if bs > 1<<32 { - panic("blocksize exceeds 32-bits") + kdf := hkdf.New(sha512.New, key, salt, []byte(Magic)) + if _, err = io.ReadFull(kdf, key); err != nil { + log.Fatalln(err) } - stdin := bufio.NewReaderSize(os.Stdin, LenSize+bs+poly1305.TagSize) - workers := make([]*Worker, *threads) - for i := 0; i < *threads; i++ { - workers[i] = NewWorker(key, salt) + var wg sync.WaitGroup + var lastMet bool + workers := make([]*Worker, 0, *jobs) + for i := 0; i < *jobs; i++ { + w := Worker{ + buf: make([]byte, *bs+chacha20poly1305.Overhead), + readyIn: make(chan struct{}), + readyOut: make(chan struct{}), + } + go func() { + ciph, err := chacha20poly1305.New(key) + if err != nil { + log.Fatalln(err) + } + nonce := make([]byte, chacha20poly1305.NonceSize) + var ciphertext, tag []byte + var s *chacha20.Cipher + var p *poly1305.MAC + for { + w.readyIn <- struct{}{} + <-w.readyIn + binary.BigEndian.PutUint64(nonce, w.ctr) + if *doDec { + tag = w.buf[len(w.buf)-poly1305.TagSize:] + ciphertext = w.buf[:len(w.buf)-poly1305.TagSize] + s, err = chacha20.NewUnauthenticatedCipher(key, nonce) + if err != nil { + log.Fatalln(err) + } + var polyKey [32]byte + s.XORKeyStream(polyKey[:], polyKey[:]) + s.SetCounter(1) + p = poly1305.New(&polyKey) + writeWithPadding(p, nil) + writeWithPadding(p, ciphertext) + writeUint64(p, 0) + writeUint64(p, len(ciphertext)) + if p.Verify(tag) { + w.buf = ciphertext + s.XORKeyStream(ciphertext, ciphertext) + } else { + lastMet = true + if _, err = io.ReadFull(kdf, key); err != nil { + log.Fatalln(err) + } + ciph, err = chacha20poly1305.New(key) + if err != nil { + log.Fatalln(err) + } + w.buf, err = ciph.Open(w.buf[:0], nonce, w.buf, nil) + if err != nil { + log.Fatalln(err) + } + lastMet = true + } + } else { + if w.last { + if _, err = io.ReadFull(kdf, key); err != nil { + log.Fatalln(err) + } + ciph, err = chacha20poly1305.New(key) + if err != nil { + log.Fatalln(err) + } + } + w.buf = ciph.Seal(w.buf[:0], nonce, w.buf, nil) + } + w.readyOut <- struct{}{} + <-w.readyOut + wg.Done() + } + }() + workers = append(workers, &w) } + go func() { - var ctr uint64 + var ctr int64 + var w *Worker + var err error for { - w := workers[ctr%uint64(len(workers))] - if _, err := os.Stdout.Write(<-w.output); err != nil { - panic(err) + w = workers[ctr%int64(len(workers))] + <-w.readyOut + if _, err = os.Stdout.Write(w.buf); err != nil { + log.Fatalln(err) } - w.written <- struct{}{} + w.readyOut <- struct{}{} ctr++ } }() + var stdin io.Reader + if *doRNG { + stdin = &DummyReader{} + } else { + stdin = bufio.NewReaderSize(os.Stdin, *bs+chacha20poly1305.Overhead) + } var ctr uint64 - var size int + var w *Worker for { - w := workers[ctr%uint64(len(workers))] - <-w.ready - if *decrypt { - _, err = io.ReadFull(stdin, w.input[:LenSize]) - if err != nil { - if err == io.EOF { - break - } - panic(err) - } - size = int(binary.BigEndian.Uint32(w.input[:LenSize])) - if _, err = io.ReadFull( - stdin, - w.input[LenSize:LenSize+size+poly1305.TagSize], - ); err != nil { - panic(err) - } + w = workers[ctr%uint64(len(workers))] + <-w.readyIn + if *doDec { + w.buf, err = readBuf(w.buf[:*bs+chacha20poly1305.Overhead], stdin) } else { - size, err = stdin.Read(w.input[LenSize : LenSize+bs]) - if err != nil { - if err == io.EOF { - break - } - panic(err) - } + w.buf, err = readBuf(w.buf[:*bs], stdin) + } + if err != nil && err != io.EOF { + log.Fatalln(err) + } + if *doDec && len(w.buf) < chacha20poly1305.Overhead { + break } + w.ctr = ctr wg.Add(1) - w.task <- Task{ctr, size} + if err == io.EOF { + w.last = true + } + w.readyIn <- struct{}{} + if err == io.EOF { + break + } ctr++ } wg.Wait() + if *doDec && !lastMet { + log.Fatalln("did not meet explicit last block") + } } diff --git a/poly1305.go b/poly1305.go new file mode 100644 index 0000000..3efbeb5 --- /dev/null +++ b/poly1305.go @@ -0,0 +1,22 @@ +package main + +import ( + "encoding/binary" + + "golang.org/x/crypto/poly1305" +) + +func writeWithPadding(p *poly1305.MAC, b []byte) { + p.Write(b) + if rem := len(b) % 16; rem != 0 { + var buf [16]byte + padLen := 16 - rem + p.Write(buf[:padLen]) + } +} + +func writeUint64(p *poly1305.MAC, n int) { + var buf [8]byte + binary.LittleEndian.PutUint64(buf[:], uint64(n)) + p.Write(buf[:]) +} -- 2.44.0