go-exif/ifd_builder_encode.go

package exif

import (
	"bytes"
	"fmt"
	"strings"

	"encoding/binary"

	"github.com/dsoprea/go-logging"
)

const (
	// Tag-ID + Tag-Type + Unit-Count + Value/Offset.
	IfdTagEntrySize = uint32(2 + 2 + 4 + 4)
)

type ByteWriter struct {
	b         *bytes.Buffer
	byteOrder binary.ByteOrder
}

func NewByteWriter(b *bytes.Buffer, byteOrder binary.ByteOrder) (bw *ByteWriter) {
	return &ByteWriter{
		b:         b,
		byteOrder: byteOrder,
	}
}

func (bw ByteWriter) writeAsBytes(value interface{}) (err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	err = binary.Write(bw.b, bw.byteOrder, value)
	log.PanicIf(err)

	return nil
}

func (bw ByteWriter) WriteUint32(value uint32) (err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	err = bw.writeAsBytes(value)
	log.PanicIf(err)

	return nil
}

func (bw ByteWriter) WriteUint16(value uint16) (err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	err = bw.writeAsBytes(value)
	log.PanicIf(err)

	return nil
}

func (bw ByteWriter) WriteFourBytes(value []byte) (err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	len_ := len(value)
	if len_ != 4 {
		log.Panicf("value is not four-bytes: (%d)", len_)
	}

	_, err = bw.b.Write(value)
	log.PanicIf(err)

	return nil
}

// ifdOffsetIterator keeps track of where the next IFD should be written by
// keeping track of where the offsets start, the data that has been added, and
// bumping the offset *when* the data is added.
type ifdDataAllocator struct {
	offset uint32
	b      bytes.Buffer
}

func newIfdDataAllocator(ifdDataAddressableOffset uint32) *ifdDataAllocator {
	return &ifdDataAllocator{
		offset: ifdDataAddressableOffset,
	}
}

func (ida *ifdDataAllocator) Allocate(value []byte) (offset uint32, err error) {
	_, err = ida.b.Write(value)
	log.PanicIf(err)

	offset = ida.offset
	ida.offset += uint32(len(value))

	return offset, nil
}

func (ida *ifdDataAllocator) NextOffset() uint32 {
	return ida.offset
}

func (ida *ifdDataAllocator) Bytes() []byte {
	return ida.b.Bytes()
}

// IfdByteEncoder converts an IB to raw bytes (for writing) while also figuring
// out all of the allocations and indirection that is required for extended
// data.
type IfdByteEncoder struct {
	// journal holds a list of actions taken while encoding.
	journal [][3]string
}

func NewIfdByteEncoder() (ibe *IfdByteEncoder) {
	return &IfdByteEncoder{
		journal: make([][3]string, 0),
	}
}

func (ibe *IfdByteEncoder) Journal() [][3]string {
	return ibe.journal
}

func (ibe *IfdByteEncoder) TableSize(entryCount int) uint32 {
	// Tag-Count + (Entry-Size * Entry-Count) + Next-IFD-Offset.
	return uint32(2) + (IfdTagEntrySize * uint32(entryCount)) + uint32(4)
}

func (ibe *IfdByteEncoder) pushToJournal(where, direction, format string, args ...interface{}) {
	event := [3]string{
		direction,
		where,
		fmt.Sprintf(format, args...),
	}

	ibe.journal = append(ibe.journal, event)
}

// PrintJournal prints a hierarchical representation of the steps taken during
// encoding.
func (ibe *IfdByteEncoder) PrintJournal() {
	maxWhereLength := 0
	for _, event := range ibe.journal {
		where := event[1]

		len_ := len(where)
		if len_ > maxWhereLength {
			maxWhereLength = len_
		}
	}

	level := 0
	for i, event := range ibe.journal {
		direction := event[0]
		where := event[1]
		message := event[2]

		if direction != ">" && direction != "<" && direction != "-" {
			log.Panicf("journal operation not valid: [%s]", direction)
		}

		if direction == "<" {
			if level <= 0 {
				log.Panicf("journal operations unbalanced (too many closes)")
			}

			level--
		}

		indent := strings.Repeat("  ", level)

		fmt.Printf("%3d %s%s %s: %s\n", i, indent, direction, where, message)

		if direction == ">" {
			level++
		}
	}

	if level != 0 {
		log.Panicf("journal operations unbalanced (too many opens)")
	}
}

// encodeTagToBytes encodes the given tag to a byte stream. If
// `nextIfdOffsetToWrite` is more than (0), recurse into child IFDs
// (`nextIfdOffsetToWrite` is required in order for them to know where the its
// IFD data will be written, in order for them to know the offset of where
// their allocated-data block will start, which follows right behind).
func (ibe *IfdByteEncoder) encodeTagToBytes(ib *IfdBuilder, bt *BuilderTag, bw *ByteWriter, ida *ifdDataAllocator, nextIfdOffsetToWrite uint32) (childIfdBlock []byte, err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	// Write tag-ID.
	err = bw.WriteUint16(bt.tagId)
	log.PanicIf(err)

	// Works for both values and child IFDs (which have an official size of
	// LONG).
	err = bw.WriteUint16(bt.typeId)
	log.PanicIf(err)

	// Write unit-count.

	if bt.value.IsBytes() == true {
		effectiveType := bt.typeId
		if bt.typeId == TypeUndefined {
			effectiveType = TypeByte
		}

		// It's a non-unknown value.Calculate the count of values of
		// the type that we're writing and the raw bytes for the whole list.

		typeSize := uint32(TagTypeSize(effectiveType))

		valueBytes := bt.value.Bytes()

		len_ := len(valueBytes)
		unitCount := uint32(len_) / typeSize

		if _, found := tagsWithoutAlignment[bt.tagId]; found == false {
			remainder := uint32(len_) % typeSize

			if remainder > 0 {
				log.Panicf("tag (0x%04x) value of (%d) bytes not evenly divisible by type-size (%d)", bt.tagId, len_, typeSize)
			}
		}

		err = bw.WriteUint32(unitCount)
		log.PanicIf(err)

		// Write four-byte value/offset.

		if len_ > 4 {
			offset, err := ida.Allocate(valueBytes)
			log.PanicIf(err)

			err = bw.WriteUint32(offset)
			log.PanicIf(err)
		} else {
			fourBytes := make([]byte, 4)
			copy(fourBytes, valueBytes)

			err = bw.WriteFourBytes(fourBytes)
			log.PanicIf(err)
		}
	} else {
		if bt.value.IsIb() == false {
			log.Panicf("tag value is not a byte-slice but also not a child IB: %v", bt)
		}

		// Write unit-count (one LONG representing one offset).
		err = bw.WriteUint32(1)
		log.PanicIf(err)

		if nextIfdOffsetToWrite > 0 {
			var err error

			ibe.pushToJournal("encodeTagToBytes", ">", "[%s]->[%s]", ib.ifdPath, bt.value.Ib().ifdPath)

			// Create the block of IFD data and everything it requires.
			childIfdBlock, err = ibe.encodeAndAttachIfd(bt.value.Ib(), nextIfdOffsetToWrite)
			log.PanicIf(err)

			ibe.pushToJournal("encodeTagToBytes", "<", "[%s]->[%s]", bt.value.Ib().ifdPath, ib.ifdPath)

			// Use the next-IFD offset for it. The IFD will actually get
			// attached after we return.
			err = bw.WriteUint32(nextIfdOffsetToWrite)
			log.PanicIf(err)

		} else {
			// No child-IFDs are to be allocated. Finish the entry with a NULL
			// pointer.

			ibe.pushToJournal("encodeTagToBytes", "-", "*Not* descending to child: [%s]", bt.value.Ib().ifdPath)

			err = bw.WriteUint32(0)
			log.PanicIf(err)
		}
	}

	return childIfdBlock, nil
}

// encodeIfdToBytes encodes the given IB to a byte-slice. We are given the
// offset at which this IFD will be written. This method is used called both to
// pre-determine how big the table is going to be (so that we can calculate the
// address to allocate data at) as well as to write the final table.
//
// It is necessary to fully realize the table in order to predetermine its size
// because it is not enough to know the size of the table: If there are child
// IFDs, we will not be able to allocate them without first knowing how much
// data we need to allocate for the current IFD.
func (ibe *IfdByteEncoder) encodeIfdToBytes(ib *IfdBuilder, ifdAddressableOffset uint32, nextIfdOffsetToWrite uint32, setNextIb bool) (data []byte, tableSize uint32, dataSize uint32, childIfdSizes []uint32, err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	ibe.pushToJournal("encodeIfdToBytes", ">", "%s", ib)

	tableSize = ibe.TableSize(len(ib.tags))

	b := new(bytes.Buffer)
	bw := NewByteWriter(b, ib.byteOrder)

	// Write tag count.
	err = bw.WriteUint16(uint16(len(ib.tags)))
	log.PanicIf(err)

	ida := newIfdDataAllocator(ifdAddressableOffset)

	childIfdBlocks := make([][]byte, 0)

	// Write raw bytes for each tag entry. Allocate larger data to be referred
	// to in the follow-up data-block as required. Any "unknown"-byte tags that
	// we can't parse will not be present here (using AddTagsFromExisting(), at
	// least).
	for _, bt := range ib.tags {
		childIfdBlock, err := ibe.encodeTagToBytes(ib, bt, bw, ida, nextIfdOffsetToWrite)
		log.PanicIf(err)

		if childIfdBlock != nil {
			// We aren't allowed to have non-nil child IFDs if we're just
			// sizing things up.
			if nextIfdOffsetToWrite == 0 {
				log.Panicf("no IFD offset provided for child-IFDs; no new child-IFDs permitted")
			}

			nextIfdOffsetToWrite += uint32(len(childIfdBlock))
			childIfdBlocks = append(childIfdBlocks, childIfdBlock)
		}
	}

	dataBytes := ida.Bytes()
	dataSize = uint32(len(dataBytes))

	childIfdSizes = make([]uint32, len(childIfdBlocks))
	childIfdsTotalSize := uint32(0)
	for i, childIfdBlock := range childIfdBlocks {
		len_ := uint32(len(childIfdBlock))
		childIfdSizes[i] = len_
		childIfdsTotalSize += len_
	}

	// N the link from this IFD to the next IFD that will be written in the
	// next cycle.
	if setNextIb == true {
		// Write address of next IFD in chain. This will be the original
		// allocation offset plus the size of everything we have allocated for
		// this IFD and its child-IFDs.
		//
		// It is critical that this number is stepped properly. We experienced
		// an issue whereby it first looked like we were duplicating the IFD and
		// then that we were duplicating the tags in the wrong IFD, and then
		// finally we determined that the next-IFD offset for the first IFD was
		// accidentally pointing back to the EXIF IFD, so we were visiting it
		// twice when visiting through the tags after decoding. It was an
		// expensive bug to find.

		ibe.pushToJournal("encodeIfdToBytes", "-", "Setting 'next' IFD to (0x%08x).", nextIfdOffsetToWrite)

		err := bw.WriteUint32(nextIfdOffsetToWrite)
		log.PanicIf(err)
	} else {
		err := bw.WriteUint32(0)
		log.PanicIf(err)
	}

	_, err = b.Write(dataBytes)
	log.PanicIf(err)

	// Append any child IFD blocks after our table and data blocks. These IFDs
	// were equipped with the appropriate offset information so it's expected
	// that all offsets referred to by these will be correct.
	//
	// Note that child-IFDs are append after the current IFD and before the
	// next IFD, as opposed to the root IFDs, which are chained together but
	// will be interrupted by these child-IFDs (which is expected, per the
	// standard).

	for _, childIfdBlock := range childIfdBlocks {
		_, err = b.Write(childIfdBlock)
		log.PanicIf(err)
	}

	ibe.pushToJournal("encodeIfdToBytes", "<", "%s", ib)

	return b.Bytes(), tableSize, dataSize, childIfdSizes, nil
}

// encodeAndAttachIfd is a reentrant function that processes the IFD chain.
func (ibe *IfdByteEncoder) encodeAndAttachIfd(ib *IfdBuilder, ifdAddressableOffset uint32) (data []byte, err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	ibe.pushToJournal("encodeAndAttachIfd", ">", "%s", ib)

	b := new(bytes.Buffer)

	i := 0

	for thisIb := ib; thisIb != nil; thisIb = thisIb.nextIb {

		// Do a dry-run in order to pre-determine its size requirement.

		ibe.pushToJournal("encodeAndAttachIfd", ">", "Beginning encoding process: (%d) [%s]", i, thisIb.ifdPath)

		ibe.pushToJournal("encodeAndAttachIfd", ">", "Calculating size: (%d) [%s]", i, thisIb.ifdPath)

		_, tableSize, allocatedDataSize, _, err := ibe.encodeIfdToBytes(thisIb, ifdAddressableOffset, 0, false)
		log.PanicIf(err)

		ibe.pushToJournal("encodeAndAttachIfd", "<", "Finished calculating size: (%d) [%s]", i, thisIb.ifdPath)

		ifdAddressableOffset += tableSize
		nextIfdOffsetToWrite := ifdAddressableOffset + allocatedDataSize

		ibe.pushToJournal("encodeAndAttachIfd", ">", "Next IFD will be written at offset (0x%08x)", nextIfdOffsetToWrite)

		// Write our IFD as well as any child-IFDs (now that we know the offset
		// where new IFDs and their data will be allocated).

		setNextIb := thisIb.nextIb != nil

		ibe.pushToJournal("encodeAndAttachIfd", ">", "Encoding starting: (%d) [%s] NEXT-IFD-OFFSET-TO-WRITE=(0x%08x)", i, thisIb.ifdPath, nextIfdOffsetToWrite)

		tableAndAllocated, effectiveTableSize, effectiveAllocatedDataSize, childIfdSizes, err :=
			ibe.encodeIfdToBytes(thisIb, ifdAddressableOffset, nextIfdOffsetToWrite, setNextIb)

		log.PanicIf(err)

		if effectiveTableSize != tableSize {
			log.Panicf("written table size does not match the pre-calculated table size: (%d) != (%d) %s", effectiveTableSize, tableSize, ib)
		} else if effectiveAllocatedDataSize != allocatedDataSize {
			log.Panicf("written allocated-data size does not match the pre-calculated allocated-data size: (%d) != (%d) %s", effectiveAllocatedDataSize, allocatedDataSize, ib)
		}

		ibe.pushToJournal("encodeAndAttachIfd", "<", "Encoding done: (%d) [%s]", i, thisIb.ifdPath)

		totalChildIfdSize := uint32(0)
		for _, childIfdSize := range childIfdSizes {
			totalChildIfdSize += childIfdSize
		}

		if len(tableAndAllocated) != int(tableSize+allocatedDataSize+totalChildIfdSize) {
			log.Panicf("IFD table and data is not a consistent size: (%d) != (%d)", len(tableAndAllocated), tableSize+allocatedDataSize+totalChildIfdSize)
		}

		// TODO(dustin): We might want to verify the original tableAndAllocated length, too.

		_, err = b.Write(tableAndAllocated)
		log.PanicIf(err)

		// Advance past what we've allocated, thus far.

		ifdAddressableOffset += allocatedDataSize + totalChildIfdSize

		ibe.pushToJournal("encodeAndAttachIfd", "<", "Finishing encoding process: (%d) [%s] [FINAL:] NEXT-IFD-OFFSET-TO-WRITE=(0x%08x)", i, ib.ifdPath, nextIfdOffsetToWrite)

		i++
	}

	ibe.pushToJournal("encodeAndAttachIfd", "<", "%s", ib)

	return b.Bytes(), nil
}

// EncodeToExifPayload is the base encoding step that transcribes the entire IB
// structure to its on-disk layout.
func (ibe *IfdByteEncoder) EncodeToExifPayload(ib *IfdBuilder) (data []byte, err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	data, err = ibe.encodeAndAttachIfd(ib, ExifDefaultFirstIfdOffset)
	log.PanicIf(err)

	return data, nil
}

// EncodeToExif calls EncodeToExifPayload and then packages the result into a
// complete EXIF block.
func (ibe *IfdByteEncoder) EncodeToExif(ib *IfdBuilder) (data []byte, err error) {
	defer func() {
		if state := recover(); state != nil {
			err = log.Wrap(state.(error))
		}
	}()

	encodedIfds, err := ibe.EncodeToExifPayload(ib)
	log.PanicIf(err)

	// Wrap the IFD in a formal EXIF block.

	b := new(bytes.Buffer)

	headerBytes, err := BuildExifHeader(ib.byteOrder, ExifDefaultFirstIfdOffset)
	log.PanicIf(err)

	_, err = b.Write(headerBytes)
	log.PanicIf(err)

	_, err = b.Write(encodedIfds)
	log.PanicIf(err)

	return b.Bytes(), nil
}