exif.go: Add support for seeking to multiple EXIF blobs (SearchAndExtractExifN)

- ifd_enumerate.go
  - Bugfixes for various broken log messages.
  - Bugfix for ErrTagNotFound not actually skipping.

- common/parser.go: Parsing ASCII now panics if there's binary (8-bit)
  characters.
  - utility.go: GetFlatExifData() just logs and skips these.

- exif.go: Add searchAndExtractExifWithReaderWithDiscarded(), to return
  how many bytes it seeks through.

Supports https://github.com/dsoprea/go-exif/issues/53
pull/56/head
Dustin Oprea 2021-01-31 15:22:57 -05:00
parent db167117f4
commit 0d9ceeb35b
5 changed files with 116 additions and 13 deletions

View File

@ -55,6 +55,7 @@ type parameters struct {
IsVerbose bool `short:"v" long:"verbose" description:"Print logging"`
ThumbnailOutputFilepath string `short:"t" long:"thumbnail-output-filepath" description:"File-path to write thumbnail to (if present)"`
DoNotPrintTags bool `short:"n" long:"no-tags" description:"Do not actually print tags. Good for auditing the logs or merely checking the EXIF structure for errors."`
SkipBlocks int `short:"s" long:"skip" description:"Skip this many EXIF blocks before returning"`
}
var (
@ -92,7 +93,7 @@ func main() {
data, err := ioutil.ReadAll(f)
log.PanicIf(err)
rawExif, err := exif.SearchAndExtractExif(data)
rawExif, err := exif.SearchAndExtractExifN(data, arguments.SkipBlocks)
if err != nil {
if err == exif.ErrNoExif {
fmt.Printf("No EXIF data.\n")
@ -107,7 +108,13 @@ func main() {
// Run the parse.
entries, _, err := exif.GetFlatExifData(rawExif, nil)
log.PanicIf(err)
if err != nil {
if arguments.SkipBlocks > 0 {
mainLogger.Warningf(nil, "Encountered an error. This might be related to the request to skip EXIF blocks.")
}
log.Panic(err)
}
// Write the thumbnail is requested and present.

View File

@ -2,6 +2,7 @@ package exifcommon
import (
"bytes"
"errors"
"math"
"encoding/binary"
@ -13,6 +14,10 @@ var (
parserLogger = log.NewLogger("exifcommon.parser")
)
var (
ErrParseFail = errors.New("parse failure")
)
// Parser knows how to parse all well-defined, encoded EXIF types.
type Parser struct {
}
@ -59,6 +64,14 @@ func (p *Parser) ParseAscii(data []byte, unitCount uint32) (value string, err er
s := string(data[:count])
parserLogger.Warningf(nil, "ascii not terminated with nul as expected: [%v]", s)
for _, c := range s {
if c > 127 {
// Binary
return "", ErrParseFail
}
}
return s, nil
}

View File

@ -70,10 +70,58 @@ func SearchAndExtractExif(data []byte) (rawExif []byte, err error) {
return rawExif, nil
}
// SearchAndExtractExifWithReader searches for an EXIF blob using an
// `io.Reader`. We can't know how much long the EXIF data is without parsing it,
// so this will likely grab up a lot of the image-data, too.
func SearchAndExtractExifWithReader(r io.Reader) (rawExif []byte, err error) {
// SearchAndExtractExifN searches for an EXIF blob in the byte-slice, but skips
// the given number of EXIF blocks first. This is a forensics tool that helps
// identify multiple EXIF blocks in a file.
func SearchAndExtractExifN(data []byte, n int) (rawExif []byte, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
skips := 0
totalDiscarded := 0
for {
b := bytes.NewBuffer(data)
var discarded int
rawExif, discarded, err = searchAndExtractExifWithReaderWithDiscarded(b)
if err != nil {
if err == ErrNoExif {
return nil, err
}
log.Panic(err)
}
exifLogger.Debugf(nil, "Read EXIF block (%d).", skips)
totalDiscarded += discarded
if skips >= n {
exifLogger.Debugf(nil, "Reached requested EXIF block (%d).", n)
break
}
nextOffset := discarded + 1
exifLogger.Debugf(nil, "Skipping EXIF block (%d) by seeking to position (%d).", skips, nextOffset)
data = data[nextOffset:]
skips++
}
exifLogger.Debugf(nil, "Found EXIF blob (%d) bytes from initial position.", totalDiscarded)
return rawExif, nil
}
// searchAndExtractExifWithReaderWithDiscarded searches for an EXIF blob using
// an `io.Reader`. We can't know how much long the EXIF data is without parsing
// it, so this will likely grab up a lot of the image-data, too.
//
// This function returned the count of preceding bytes.
func searchAndExtractExifWithReaderWithDiscarded(r io.Reader) (rawExif []byte, discarded int, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
@ -85,13 +133,12 @@ func SearchAndExtractExifWithReader(r io.Reader) (rawExif []byte, err error) {
// least, again, with JPEGs).
br := bufio.NewReader(r)
discarded := 0
for {
window, err := br.Peek(ExifSignatureLength)
if err != nil {
if err == io.EOF {
return nil, ErrNoExif
return nil, 0, ErrNoExif
}
log.Panic(err)
@ -122,6 +169,30 @@ func SearchAndExtractExifWithReader(r io.Reader) (rawExif []byte, err error) {
rawExif, err = ioutil.ReadAll(br)
log.PanicIf(err)
return rawExif, discarded, nil
}
// RELEASE(dustin): We should replace the implementation of SearchAndExtractExifWithReader with searchAndExtractExifWithReaderWithDiscarded and drop the latter.
// SearchAndExtractExifWithReader searches for an EXIF blob using an
// `io.Reader`. We can't know how much long the EXIF data is without parsing it,
// so this will likely grab up a lot of the image-data, too.
func SearchAndExtractExifWithReader(r io.Reader) (rawExif []byte, err error) {
defer func() {
if state := recover(); state != nil {
err = log.Wrap(state.(error))
}
}()
rawExif, _, err = searchAndExtractExifWithReaderWithDiscarded(r)
if err != nil {
if err == ErrNoExif {
return nil, err
}
log.Panic(err)
}
return rawExif, nil
}

View File

@ -232,15 +232,15 @@ func (ie *IfdEnumerate) parseTag(ii *exifcommon.IfdIdentity, tagPosition int, bp
// if the type stored alongside the data disagrees with it,
// which it apparently does, all bets are off.
ifdEnumerateLogger.Warningf(nil,
"Tag (0x%04x) in IFD [%s] at position (%d) has invalid type and will be skipped.",
tagId, ii, tagPosition, tagType)
"Tag (0x%04x) in IFD [%s] at position (%d) has invalid type (0x%04x) and will be skipped.",
tagId, ii, tagPosition, int(tagType))
ite = &IfdTagEntry{
tagId: tagId,
tagType: tagType,
}
return nil, ErrTagTypeNotValid
return ite, ErrTagTypeNotValid
}
// Check whether the embedded type is listed among the supported types for
@ -250,6 +250,12 @@ func (ie *IfdEnumerate) parseTag(ii *exifcommon.IfdIdentity, tagPosition int, bp
if err != nil {
if log.Is(err, ErrTagNotFound) == true {
ifdEnumerateLogger.Warningf(nil, "Tag (0x%04x) is not known and will be skipped.", tagId)
ite = &IfdTagEntry{
tagId: tagId,
}
return ite, ErrTagNotFound
}
log.Panic(err)
@ -262,7 +268,7 @@ func (ie *IfdEnumerate) parseTag(ii *exifcommon.IfdIdentity, tagPosition int, bp
// suddenly have data that we no longer manipulate correctly/
// accurately.
ifdEnumerateLogger.Warningf(nil,
"Tag (0x%04x) in IFD [%s] at position (%d) has invalid type and will be skipped.",
"Tag (0x%04x) in IFD [%s] at position (%d) has unsupported type (%04x) and will be skipped.",
tagId, ii, tagPosition, tagType)
return nil, ErrTagTypeNotValid
@ -544,7 +550,7 @@ func (ie *IfdEnumerate) parseIfd(ii *exifcommon.IfdIdentity, bp *byteParser, vis
nextIfdOffset, _, err = bp.getUint32()
log.PanicIf(err)
ifdEnumerateLogger.Debugf(nil, "Next IFD at offset: (%08x)", nextIfdOffset)
ifdEnumerateLogger.Debugf(nil, "Next IFD at offset: (0x%08x)", nextIfdOffset)
return nextIfdOffset, entries, thumbnailData, nil
}

View File

@ -107,6 +107,12 @@ func GetFlatExifData(exifData []byte, so *ScanOptions) (exifTags []ExifTag, med
if err != nil {
if err == exifcommon.ErrUnhandledUndefinedTypedTag {
value = exifundefined.UnparseableUnknownTagValuePlaceholder
} else if log.Is(err, exifcommon.ErrParseFail) == true {
utilityLogger.Warningf(nil,
"Could not parse value for tag [%s] (%04x) [%s].",
ite.IfdPath(), ite.TagId(), ite.TagName())
return nil
} else {
log.Panic(err)
}