go-exif/ifd_enumerate.go

529 lines
14 KiB
Go

package exif
import (
"bytes"
"fmt"
"strings"
"encoding/binary"
"github.com/dsoprea/go-logging"
)
var (
ifdEnumerateLogger = log.NewLogger("exifjpeg.ifd")
)
// IfdTagEnumerator knows how to decode an IFD and all of the tags it
// describes. Note that the IFDs and the actual values floating throughout the
// whole EXIF block, but the IFD itself has just a minor header and a set of
// repeating, statically-sized records. So, the tags (though not their values)
// are fairly simple to enumerate.
type IfdTagEnumerator struct {
byteOrder binary.ByteOrder
addressableData []byte
ifdOffset uint32
buffer *bytes.Buffer
}
func NewIfdTagEnumerator(addressableData []byte, byteOrder binary.ByteOrder, ifdOffset uint32) (ite *IfdTagEnumerator) {
ite = &IfdTagEnumerator{
addressableData: addressableData,
byteOrder: byteOrder,
buffer: bytes.NewBuffer(addressableData[ifdOffset:]),
}
return ite
}
// getUint16 reads a uint16 and advances both our current and our current
// accumulator (which allows us to know how far to seek to the beginning of the
// next IFD when it's time to jump).
func (ife *IfdTagEnumerator) getUint16() (value uint16, raw []byte, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
raw = make([]byte, 2)
_, err = ife.buffer.Read(raw)
log.PanicIf(err)
if ife.byteOrder == binary.BigEndian {
value = binary.BigEndian.Uint16(raw)
} else {
value = binary.LittleEndian.Uint16(raw)
}
return value, raw, nil
}
// getUint32 reads a uint32 and advances both our current and our current
// accumulator (which allows us to know how far to seek to the beginning of the
// next IFD when it's time to jump).
func (ife *IfdTagEnumerator) getUint32() (value uint32, raw []byte, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
raw = make([]byte, 4)
_, err = ife.buffer.Read(raw)
log.PanicIf(err)
if ife.byteOrder == binary.BigEndian {
value = binary.BigEndian.Uint32(raw)
} else {
value = binary.LittleEndian.Uint32(raw)
}
return value, raw, nil
}
type IfdEnumerate struct {
exifData []byte
buffer *bytes.Buffer
byteOrder binary.ByteOrder
currentOffset uint32
}
func NewIfdEnumerate(exifData []byte, byteOrder binary.ByteOrder) *IfdEnumerate {
// Make it obvious what data we expect and when we don't get it.
if IsExif(exifData) == false {
log.Panicf("not exif data")
}
return &IfdEnumerate{
exifData: exifData,
buffer: bytes.NewBuffer(exifData),
byteOrder: byteOrder,
}
}
// ValueContext describes all of the parameters required to find and extract
// the actual tag value.
type ValueContext struct {
UnitCount uint32
ValueOffset uint32
RawValueOffset []byte
AddressableData []byte
}
func (ie *IfdEnumerate) getTagEnumerator(ifdOffset uint32) (ite *IfdTagEnumerator) {
ite = NewIfdTagEnumerator(
ie.exifData[ExifAddressableAreaStart:],
ie.byteOrder,
ifdOffset)
return ite
}
// TagVisitor is an optional callback that can get hit for every tag we parse
// through. `addressableData` is the byte array startign after the EXIF header
// (where the offsets of all IFDs and values are calculated from).
type TagVisitor func(indexedIfdName string, tagId uint16, tagType TagType, valueContext ValueContext) (err error)
type IfdTagEntry struct {
TagId uint16
TagIndex int
TagType uint16
UnitCount uint32
ValueOffset uint32
RawValueOffset []byte
// ChildIfdName is a name if this tag represents a child IFD.
ChildIfdName string
// IfdName is the IFD that this tag belongs to.
IfdName string
}
func (ite IfdTagEntry) String() string {
return fmt.Sprintf("IfdTagEntry<TAG-IFD=[%s] TAG-ID=(0x%02x) TAG-TYPE=[%s] UNIT-COUNT=(%d)>", ite.ChildIfdName, ite.TagId, TypeNames[ite.TagType], ite.UnitCount)
}
func (ite IfdTagEntry) ValueBytes(addressableData []byte, byteOrder binary.ByteOrder) (value []byte, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
if ite.TagType == TypeUndefined {
valueContext := ValueContext{
UnitCount: ite.UnitCount,
ValueOffset: ite.ValueOffset,
RawValueOffset: ite.RawValueOffset,
AddressableData: addressableData,
}
value, err := UndefinedValue(ite.IfdName, ite.TagId, valueContext, byteOrder)
log.PanicIf(err)
switch value.(type) {
case []byte:
return value.([]byte), nil
case string:
return []byte(value.(string)), nil
default:
// TODO(dustin): !! Finish translating the rest of the types (make reusable and replace into other similar implementations?)
log.Panicf("can not produce bytes for unknown-type tag (0x%02x)", ite.TagId)
}
}
originalType := NewTagType(ite.TagType, byteOrder)
byteCount := uint32(originalType.Size()) * ite.UnitCount
tt := NewTagType(TypeByte, byteOrder)
if tt.ValueIsEmbedded(byteCount) == true {
typeDecodeLogger.Debugf(nil, "Reading BYTE value (ITE; embedded).")
// In this case, the bytes normally used for the offset are actually
// data.
value, err = tt.ParseBytes(ite.RawValueOffset, byteCount)
log.PanicIf(err)
} else {
typeDecodeLogger.Debugf(nil, "Reading BYTE value (ITE; at offset).")
value, err = tt.ParseBytes(addressableData[ite.ValueOffset:], byteCount)
log.PanicIf(err)
}
return value, nil
}
type IfdTagEntryValueResolver struct {
addressableData []byte
byteOrder binary.ByteOrder
}
func NewIfdTagEntryValueResolver(exifData []byte, byteOrder binary.ByteOrder) (itevr *IfdTagEntryValueResolver) {
// Make it obvious what data we expect and when we don't get it.
if IsExif(exifData) == false {
log.Panicf("not exif data")
}
return &IfdTagEntryValueResolver{
addressableData: exifData[ExifAddressableAreaStart:],
byteOrder: byteOrder,
}
}
func (itevr *IfdTagEntryValueResolver) ValueBytes(ite *IfdTagEntry) (value []byte, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
value, err = ite.ValueBytes(itevr.addressableData, itevr.byteOrder)
return value, err
}
// ParseIfd decodes the IFD block that we're currently sitting on the first
// byte of.
func (ie *IfdEnumerate) ParseIfd(ifdName string, ifdIndex int, ifdOffset uint32, visitor TagVisitor, doDescend bool) (nextIfdOffset uint32, entries []IfdTagEntry, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
ifdEnumerateLogger.Debugf(nil, "Parsing IFD [%s] (%d) at offset (%04x).", ifdName, ifdIndex, ifdOffset)
// Return the name of the IFD as its known in our tag-index. We should skip
// over the current IFD if this is empty (which means we don't recognize/
// understand the IFD and, therefore, don't know the tags that are valid for
// it). Note that we could leave ignoring the tags as a responsibility for
// the visitor, but then it'd be easy for people to integrate that logic and
// not realize that they needed to specially handle an empty IFD name until
// they happened upon some obscure media one day and suddenly have issue if
// they unwittingly write something that breaks in that situation.
indexedIfdName := IfdName(ifdName, ifdIndex)
if indexedIfdName == "" {
ifdEnumerateLogger.Debugf(nil, "IFD not known and will not be visited: [%s] (%d)", ifdName, ifdIndex)
}
ite := ie.getTagEnumerator(ifdOffset)
tagCount, _, err := ite.getUint16()
log.PanicIf(err)
ifdEnumerateLogger.Debugf(nil, "Current IFD tag-count: (%d)", tagCount)
entries = make([]IfdTagEntry, tagCount)
for i := uint16(0); i < tagCount; i++ {
tagId, _, err := ite.getUint16()
log.PanicIf(err)
tagType, _, err := ite.getUint16()
log.PanicIf(err)
unitCount, _, err := ite.getUint32()
log.PanicIf(err)
valueOffset, rawValueOffset, err := ite.getUint32()
log.PanicIf(err)
if visitor != nil && indexedIfdName != "" {
tt := NewTagType(tagType, ie.byteOrder)
vc := ValueContext{
UnitCount: unitCount,
ValueOffset: valueOffset,
RawValueOffset: rawValueOffset,
AddressableData: ie.exifData[ExifAddressableAreaStart:],
}
err := visitor(indexedIfdName, tagId, tt, vc)
log.PanicIf(err)
}
tag := IfdTagEntry{
IfdName: ifdName,
TagId: tagId,
TagIndex: int(i),
TagType: tagType,
UnitCount: unitCount,
ValueOffset: valueOffset,
RawValueOffset: rawValueOffset,
}
childIfdName, isIfd := IsIfdTag(tagId)
if isIfd == true {
tag.ChildIfdName = childIfdName
if doDescend == true {
ifdEnumerateLogger.Debugf(nil, "Descending to IFD [%s].", childIfdName)
err := ie.Scan(childIfdName, valueOffset, visitor)
log.PanicIf(err)
}
}
entries[i] = tag
}
nextIfdOffset, _, err = ite.getUint32()
log.PanicIf(err)
ifdEnumerateLogger.Debugf(nil, "Next IFD at offset: (%08x)", nextIfdOffset)
return nextIfdOffset, entries, nil
}
// Scan enumerates the different EXIF blocks (called IFDs).
func (ie *IfdEnumerate) Scan(ifdName string, ifdOffset uint32, visitor TagVisitor) (err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
for ifdIndex := 0;; ifdIndex++ {
nextIfdOffset, _, err := ie.ParseIfd(ifdName, ifdIndex, ifdOffset, visitor, true)
log.PanicIf(err)
if nextIfdOffset == 0 {
break
}
ifdOffset = nextIfdOffset
}
return nil
}
type Ifd struct {
Id int
ParentIfd *Ifd
Name string
Index int
Offset uint32
// TODO(dustin): !! Add a find method.
Entries []IfdTagEntry
Children []*Ifd
NextIfdOffset uint32
NextIfd *Ifd
}
func (ifd Ifd) String() string {
parentOffset := uint32(0)
if ifd.ParentIfd != nil {
parentOffset = ifd.ParentIfd.Offset
}
return fmt.Sprintf("IFD<ID=(%d) N=[%s] IDX=(%d) OFF=(0x%04x) COUNT=(%d) CHILDREN=(%d) PARENT=(0x%04x) NEXT-IFD=(0x%04x)", ifd.Id, ifd.Name, ifd.Index, ifd.Offset, len(ifd.Entries), len(ifd.Children), parentOffset, ifd.NextIfdOffset)
}
func (ifd Ifd) printNode(level int, nextLink bool) {
indent := strings.Repeat(" ", level * 2)
prefix := " "
if nextLink {
prefix = ">"
}
fmt.Printf("%s%s%s\n", indent, prefix, ifd)
for _, childIfd := range ifd.Children {
childIfd.printNode(level + 1, false)
}
if ifd.NextIfd != nil {
ifd.NextIfd.printNode(level, true)
}
}
func (ifd Ifd) PrintTree() {
ifd.printNode(0, false)
}
type QueuedIfd struct {
Name string
Index int
Offset uint32
Parent *Ifd
}
type IfdIndex struct {
RootIfd *Ifd
Ifds []*Ifd
Tree map[int]*Ifd
Lookup map[string][]*Ifd
}
// Scan enumerates the different EXIF blocks (called IFDs).
func (ie *IfdEnumerate) Collect(rootIfdOffset uint32) (index IfdIndex, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
tree := make(map[int]*Ifd)
ifds := make([]*Ifd, 0)
lookup := make(map[string][]*Ifd)
queue := []QueuedIfd {
{
Name: IfdStandard,
Index: 0,
Offset: rootIfdOffset,
},
}
edges := make(map[uint32]*Ifd)
for {
if len(queue) == 0 {
break
}
name := queue[0].Name
index := queue[0].Index
offset := queue[0].Offset
parentIfd := queue[0].Parent
queue = queue[1:]
nextIfdOffset, entries, err := ie.ParseIfd(name, index, offset, nil, false)
log.PanicIf(err)
id := len(ifds)
ifd := Ifd{
Id: id,
ParentIfd: parentIfd,
Name: name,
Index: index,
Offset: offset,
Entries: entries,
Children: make([]*Ifd, 0),
NextIfdOffset: nextIfdOffset,
}
// Add ourselves to a big list of IFDs.
ifds = append(ifds, &ifd)
// Install ourselves into a by-id lookup table (keys are unique).
tree[id] = &ifd
// Install into by-name buckets.
if list_, found := lookup[name]; found == true {
lookup[name] = append(list_, &ifd)
} else {
list_ = make([]*Ifd, 1)
list_[0] = &ifd
lookup[name] = list_
}
// Add a link from the previous IFD in the chain to us.
if previousIfd, found := edges[offset]; found == true {
previousIfd.NextIfd = &ifd
}
// Attach as a child to our parent (where we appeared as a tag in
// that IFD).
if parentIfd != nil {
parentIfd.Children = append(parentIfd.Children, &ifd)
}
// Determine if any of our entries is a child IFD and queue it.
for _, entry := range entries {
if entry.ChildIfdName == "" {
continue
}
qi := QueuedIfd {
Name: entry.ChildIfdName,
Index: 0,
Offset: entry.ValueOffset,
Parent: &ifd,
}
queue = append(queue, qi)
}
// If there's another IFD in the chain.
if nextIfdOffset != 0 {
// Allow the next link to know what the previous link was.
edges[nextIfdOffset] = &ifd
qi := QueuedIfd {
Name: IfdStandard,
Index: index + 1,
Offset: nextIfdOffset,
}
queue = append(queue, qi)
}
}
index.RootIfd = tree[0]
index.Ifds = ifds
index.Tree = tree
index.Lookup = lookup
return index, nil
}