1
0
mirror of https://github.com/dsoprea/go-exif.git synced 2025-04-27 13:12:39 +00:00
go-exif/v3/common/value_context.go
Dustin Oprea 312218d7b1 Use encapsulated data/reader rather than bytes
Given a stream of data, it is possible to determine the beginning of
EXIF data but not the end. Therefore, either an image-aware
implementation must know how to parse an image and extract the EXIF
data or a brute-force search implementation (one of which is provided
by this project) must find the start anchor and then return all bytes
from that to the end of the file.

We have been made aware of some use-cases where a brute-force search
might be unavoidable due to trust or stability issues with the image
structure. This leads to large allocations. This can be avoided by
accomodating support that will allow for both a byte-slice or an
`io.ReadSeeker`. Since the EXIF structure is typically not read-
intensive (a couple of kilobytes if no thumbnail is present), this
should have a minimal performance impact.

Closes 
2020-07-11 12:12:48 -04:00

425 lines
11 KiB
Go

package exifcommon
import (
"errors"
"io"
"encoding/binary"
"github.com/dsoprea/go-logging"
)
var (
parser *Parser
)
var (
// ErrNotFarValue indicates that an offset-based lookup was attempted for a
// non-offset-based (embedded) value.
ErrNotFarValue = errors.New("not a far value")
)
// ValueContext embeds all of the parameters required to find and extract the
// actual tag value.
type ValueContext struct {
unitCount uint32
valueOffset uint32
rawValueOffset []byte
rs io.ReadSeeker
tagType TagTypePrimitive
byteOrder binary.ByteOrder
// undefinedValueTagType is the effective type to use if this is an
// "undefined" value.
undefinedValueTagType TagTypePrimitive
ifdPath string
tagId uint16
}
// TODO(dustin): We can update newValueContext() to derive `valueOffset` itself (from `rawValueOffset`).
// NewValueContext returns a new ValueContext struct.
func NewValueContext(ifdPath string, tagId uint16, unitCount, valueOffset uint32, rawValueOffset []byte, rs io.ReadSeeker, tagType TagTypePrimitive, byteOrder binary.ByteOrder) *ValueContext {
return &ValueContext{
unitCount: unitCount,
valueOffset: valueOffset,
rawValueOffset: rawValueOffset,
rs: rs,
tagType: tagType,
byteOrder: byteOrder,
ifdPath: ifdPath,
tagId: tagId,
}
}
// SetUndefinedValueType sets the effective type if this is an unknown-type tag.
func (vc *ValueContext) SetUndefinedValueType(tagType TagTypePrimitive) {
if vc.tagType != TypeUndefined {
log.Panicf("can not set effective type for unknown-type tag because this is *not* an unknown-type tag")
}
vc.undefinedValueTagType = tagType
}
// UnitCount returns the embedded unit-count.
func (vc *ValueContext) UnitCount() uint32 {
return vc.unitCount
}
// ValueOffset returns the value-offset decoded as a `uint32`.
func (vc *ValueContext) ValueOffset() uint32 {
return vc.valueOffset
}
// RawValueOffset returns the uninterpreted value-offset. This is used for
// embedded values (values small enough to fit within the offset bytes rather
// than needing to be stored elsewhere and referred to by an actual offset).
func (vc *ValueContext) RawValueOffset() []byte {
return vc.rawValueOffset
}
// AddressableData returns the block of data that we can dereference into.
func (vc *ValueContext) AddressableData() io.ReadSeeker {
// RELEASE)dustin): Rename from AddressableData() to ReadSeeker()
return vc.rs
}
// ByteOrder returns the byte-order of numbers.
func (vc *ValueContext) ByteOrder() binary.ByteOrder {
return vc.byteOrder
}
// IfdPath returns the path of the IFD containing this tag.
func (vc *ValueContext) IfdPath() string {
return vc.ifdPath
}
// TagId returns the ID of the tag that we represent.
func (vc *ValueContext) TagId() uint16 {
return vc.tagId
}
// isEmbedded returns whether the value is embedded or a reference. This can't
// be precalculated since the size is not defined for all types (namely the
// "undefined" types).
func (vc *ValueContext) isEmbedded() bool {
tagType := vc.effectiveValueType()
return (tagType.Size() * int(vc.unitCount)) <= 4
}
// SizeInBytes returns the number of bytes that this value requires. The
// underlying call will panic if the type is UNDEFINED. It is the
// responsibility of the caller to preemptively check that.
func (vc *ValueContext) SizeInBytes() int {
tagType := vc.effectiveValueType()
return tagType.Size() * int(vc.unitCount)
}
// effectiveValueType returns the effective type of the unknown-type tag or, if
// not unknown, the actual type.
func (vc *ValueContext) effectiveValueType() (tagType TagTypePrimitive) {
if vc.tagType == TypeUndefined {
tagType = vc.undefinedValueTagType
if tagType == 0 {
log.Panicf("undefined-value type not set")
}
} else {
tagType = vc.tagType
}
return tagType
}
// readRawEncoded returns the encoded bytes for the value that we represent.
func (vc *ValueContext) readRawEncoded() (rawBytes []byte, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
tagType := vc.effectiveValueType()
unitSizeRaw := uint32(tagType.Size())
if vc.isEmbedded() == true {
byteLength := unitSizeRaw * vc.unitCount
return vc.rawValueOffset[:byteLength], nil
}
_, err = vc.rs.Seek(int64(vc.valueOffset), io.SeekStart)
log.PanicIf(err)
rawBytes = make([]byte, vc.unitCount*unitSizeRaw)
_, err = io.ReadFull(vc.rs, rawBytes)
log.PanicIf(err)
return rawBytes, nil
}
// GetFarOffset returns the offset if the value is not embedded [within the
// pointer itself] or an error if an embedded value.
func (vc *ValueContext) GetFarOffset() (offset uint32, err error) {
if vc.isEmbedded() == true {
return 0, ErrNotFarValue
}
return vc.valueOffset, nil
}
// ReadRawEncoded returns the encoded bytes for the value that we represent.
func (vc *ValueContext) ReadRawEncoded() (rawBytes []byte, err error) {
// TODO(dustin): Remove this method and rename readRawEncoded in its place.
return vc.readRawEncoded()
}
// Format returns a string representation for the value.
//
// Where the type is not ASCII, `justFirst` indicates whether to just stringify
// the first item in the slice (or return an empty string if the slice is
// empty).
//
// Since this method lacks the information to process undefined-type tags (e.g.
// byte-order, tag-ID, IFD type), it will return an error if attempted. See
// `Undefined()`.
func (vc *ValueContext) Format() (value string, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawBytes, err := vc.readRawEncoded()
log.PanicIf(err)
phrase, err := FormatFromBytes(rawBytes, vc.effectiveValueType(), false, vc.byteOrder)
log.PanicIf(err)
return phrase, nil
}
// FormatFirst is similar to `Format` but only gets and stringifies the first
// item.
func (vc *ValueContext) FormatFirst() (value string, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawBytes, err := vc.readRawEncoded()
log.PanicIf(err)
phrase, err := FormatFromBytes(rawBytes, vc.tagType, true, vc.byteOrder)
log.PanicIf(err)
return phrase, nil
}
// ReadBytes parses the encoded byte-array from the value-context.
func (vc *ValueContext) ReadBytes() (value []byte, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawValue, err := vc.readRawEncoded()
log.PanicIf(err)
value, err = parser.ParseBytes(rawValue, vc.unitCount)
log.PanicIf(err)
return value, nil
}
// ReadAscii parses the encoded NUL-terminated ASCII string from the value-
// context.
func (vc *ValueContext) ReadAscii() (value string, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawValue, err := vc.readRawEncoded()
log.PanicIf(err)
value, err = parser.ParseAscii(rawValue, vc.unitCount)
log.PanicIf(err)
return value, nil
}
// ReadAsciiNoNul parses the non-NUL-terminated encoded ASCII string from the
// value-context.
func (vc *ValueContext) ReadAsciiNoNul() (value string, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawValue, err := vc.readRawEncoded()
log.PanicIf(err)
value, err = parser.ParseAsciiNoNul(rawValue, vc.unitCount)
log.PanicIf(err)
return value, nil
}
// ReadShorts parses the list of encoded shorts from the value-context.
func (vc *ValueContext) ReadShorts() (value []uint16, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawValue, err := vc.readRawEncoded()
log.PanicIf(err)
value, err = parser.ParseShorts(rawValue, vc.unitCount, vc.byteOrder)
log.PanicIf(err)
return value, nil
}
// ReadLongs parses the list of encoded, unsigned longs from the value-context.
func (vc *ValueContext) ReadLongs() (value []uint32, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawValue, err := vc.readRawEncoded()
log.PanicIf(err)
value, err = parser.ParseLongs(rawValue, vc.unitCount, vc.byteOrder)
log.PanicIf(err)
return value, nil
}
// ReadRationals parses the list of encoded, unsigned rationals from the value-
// context.
func (vc *ValueContext) ReadRationals() (value []Rational, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawValue, err := vc.readRawEncoded()
log.PanicIf(err)
value, err = parser.ParseRationals(rawValue, vc.unitCount, vc.byteOrder)
log.PanicIf(err)
return value, nil
}
// ReadSignedLongs parses the list of encoded, signed longs from the value-context.
func (vc *ValueContext) ReadSignedLongs() (value []int32, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawValue, err := vc.readRawEncoded()
log.PanicIf(err)
value, err = parser.ParseSignedLongs(rawValue, vc.unitCount, vc.byteOrder)
log.PanicIf(err)
return value, nil
}
// ReadSignedRationals parses the list of encoded, signed rationals from the
// value-context.
func (vc *ValueContext) ReadSignedRationals() (value []SignedRational, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawValue, err := vc.readRawEncoded()
log.PanicIf(err)
value, err = parser.ParseSignedRationals(rawValue, vc.unitCount, vc.byteOrder)
log.PanicIf(err)
return value, nil
}
// Values knows how to resolve the given value. This value is always a list
// (undefined-values aside), so we're named accordingly.
//
// Since this method lacks the information to process unknown-type tags (e.g.
// byte-order, tag-ID, IFD type), it will return an error if attempted. See
// `Undefined()`.
func (vc *ValueContext) Values() (values interface{}, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
if vc.tagType == TypeByte {
values, err = vc.ReadBytes()
log.PanicIf(err)
} else if vc.tagType == TypeAscii {
values, err = vc.ReadAscii()
log.PanicIf(err)
} else if vc.tagType == TypeAsciiNoNul {
values, err = vc.ReadAsciiNoNul()
log.PanicIf(err)
} else if vc.tagType == TypeShort {
values, err = vc.ReadShorts()
log.PanicIf(err)
} else if vc.tagType == TypeLong {
values, err = vc.ReadLongs()
log.PanicIf(err)
} else if vc.tagType == TypeRational {
values, err = vc.ReadRationals()
log.PanicIf(err)
} else if vc.tagType == TypeSignedLong {
values, err = vc.ReadSignedLongs()
log.PanicIf(err)
} else if vc.tagType == TypeSignedRational {
values, err = vc.ReadSignedRationals()
log.PanicIf(err)
} else if vc.tagType == TypeUndefined {
log.Panicf("will not parse undefined-type value")
// Never called.
return nil, nil
} else {
log.Panicf("value of type [%s] is unparseable", vc.tagType)
// Never called.
return nil, nil
}
return values, nil
}
func init() {
parser = new(Parser)
}