Browse Source

Improvements in field handling and utility functions

master
Gunnsteinn Hall 2 years ago
parent
commit
178727ca05
4 changed files with 145 additions and 21 deletions
  1. +1
    -0
      .gitattributes
  2. +100
    -10
      pdf/model/fields.go
  3. +27
    -10
      pdf/model/form.go
  4. +17
    -1
      pdf/model/reader.go

+ 1
- 0
.gitattributes View File

@ -0,0 +1 @@
* -crlf

+ 100
- 10
pdf/model/fields.go View File

@ -9,6 +9,7 @@ import (
"bytes"
"errors"
"fmt"
"strings"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core"
@ -73,6 +74,74 @@ func (flag FieldFlag) Has(fl FieldFlag) bool {
return (flag.Mask() & fl.Mask()) > 0
}
// String returns a string representation of what flags are set.
func (flag FieldFlag) String() string {
s := ""
if flag == FieldFlagClear {
s = "Clear"
return s
}
if flag&FieldFlagReadOnly > 0 {
s += "|ReadOnly"
}
if flag&FieldFlagRequired > 0 {
s += "|ReadOnly"
}
if flag&FieldFlagNoExport > 0 {
s += "|NoExport"
}
if flag&FieldFlagNoToggleToOff > 0 {
s += "|NoToggleToOff"
}
if flag&FieldFlagRadio > 0 {
s += "|Radio"
}
if flag&FieldFlagPushbutton > 0 {
s += "|Pushbutton"
}
if flag&FieldFlagRadiosInUnision > 0 {
s += "|RadiosInUnision"
}
if flag&FieldFlagMultiline > 0 {
s += "|Multiline"
}
if flag&FieldFlagPassword > 0 {
s += "|Password"
}
if flag&FieldFlagFileSelect > 0 {
s += "|FileSelect"
}
if flag&FieldFlagDoNotScroll > 0 {
s += "|DoNotScroll"
}
if flag&FieldFlagComb > 0 {
s += "|Comb"
}
if flag&FieldFlagRichText > 0 {
s += "|RichText"
}
if flag&FieldFlagDoNotSpellCheck > 0 {
s += "|DoNotSpellCheck"
}
if flag&FieldFlagCombo > 0 {
s += "|Combo"
}
if flag&FieldFlagEdit > 0 {
s += "|Edit"
}
if flag&FieldFlagSort > 0 {
s += "|Sort"
}
if flag&FieldFlagMultiSelect > 0 {
s += "|MultiSelect"
}
if flag&FieldFlagCommitOnSelChange > 0 {
s += "|CommitOnSelChange"
}
return strings.Trim(s, "|")
}
// PdfField contains the common attributes of a form field. The context object contains the specific field data
// which can represent a button, text, choice or signature.
// The PdfField is typically not used directly, but is encapsulated by the more specific field types such as
@ -83,11 +152,10 @@ type PdfField struct {
isTerminal *bool // If set: indicates whether is a terminal field (if null, may not be determined yet).
Parent *PdfField
Annotations []*PdfAnnotation
Annotations []*PdfAnnotationWidget
Kids []*PdfField
FT *core.PdfObjectName
//Kids *core.PdfObjectArray
T *core.PdfObjectString
TU *core.PdfObjectString
TM *core.PdfObjectString
@ -382,7 +450,7 @@ type PdfFieldSignature struct {
}
// ToPdfObject returns an indirect object containing the signature field dictionary.
func (sig *PdfFieldSignature) ToPdfObject() *core.PdfIndirectObject {
func (sig *PdfFieldSignature) ToPdfObject() core.PdfObject {
// Set general field attributes
sig.PdfField.ToPdfObject()
container := sig.container
@ -463,7 +531,7 @@ func (f *PdfField) Flags() FieldFlag {
common.Log.Debug("Error evaluating flags via inheritance: %v", err)
}
if !found {
common.Log.Debug("No field flags found")
common.Log.Trace("No field flags found - assume clear")
}
return flags
@ -549,7 +617,15 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
}
ctx.PdfField = field
field.context = ctx
case "Sig":
ctx, err := newPdfFieldSignatureFromDict(d)
if err != nil {
return nil, err
}
ctx.PdfField = field
field.context = ctx
default:
common.Log.Debug("Unsupported field type %s", *field.FT)
return nil, errors.New("Unsupported field type")
}
}
@ -566,10 +642,10 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
field.Parent = parent
}
field.Annotations = []*PdfAnnotation{}
field.Annotations = []*PdfAnnotationWidget{}
// Has a merged-in widget annotation?
if name := d.GetDirect("Subtype").(*core.PdfObjectName); name != nil {
if name, _ := d.GetDirect("Subtype").(*core.PdfObjectName); name != nil {
if *name == "Widget" {
// Is a merged field / widget dict.
@ -585,7 +661,7 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
}
widget.Parent = field.GetContainingPdfObject()
field.Annotations = append(field.Annotations, annot)
field.Annotations = append(field.Annotations, widget)
return field, nil
}
@ -613,12 +689,16 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
// Widget annotations contain key Subtype with value equal to /Widget. Otherwise are assumed to be fields.
if name, has := dict.GetDirect("Subtype").(*core.PdfObjectName); has && *name == "Widget" {
widg, err := r.newPdfAnnotationFromIndirectObject(container)
annot, err := r.newPdfAnnotationFromIndirectObject(container)
if err != nil {
common.Log.Debug("Error loading widget annotation for field: %v", err)
return nil, err
}
field.Annotations = append(field.Annotations, widg)
wa, ok := annot.context.(*PdfAnnotationWidget)
if !ok {
return nil, ErrTypeCheck
}
field.Annotations = append(field.Annotations, wa)
} else {
childf, err := r.newPdfFieldFromIndirectObject(container, field)
if err != nil {
@ -642,7 +722,7 @@ func newPdfFieldTextFromDict(d *core.PdfObjectDictionary) (*PdfFieldText, error)
textf.DS, _ = d.GetDirect("DS").(*core.PdfObjectString)
textf.RV = d.Get("RV")
// TODO: MaxLen should be loaded for other fields too?
textf.MaxLen = d.Get("MaxLen").(*core.PdfObjectInteger)
textf.MaxLen, _ = d.Get("MaxLen").(*core.PdfObjectInteger)
return textf, nil
}
@ -663,3 +743,13 @@ func newPdfFieldButtonFromDict(d *core.PdfObjectDictionary) (*PdfFieldButton, er
buttonf.Opt, _ = d.GetDirect("Opt").(*core.PdfObjectArray)
return buttonf, nil
}
// newPdfFieldSignatureFromDict returns a new PdfFieldSignature (representing a signature field) loaded from a dictionary.
// This function loads only the signature-specific fields (called by a more generic field loader).
func newPdfFieldSignatureFromDict(d *core.PdfObjectDictionary) (*PdfFieldSignature, error) {
sigf := &PdfFieldSignature{}
sigf.V, _ = d.Get("V").(*core.PdfIndirectObject)
sigf.Lock, _ = d.Get("Lock").(*core.PdfIndirectObject)
sigf.SV, _ = d.Get("SV").(*core.PdfIndirectObject)
return sigf, nil
}

+ 27
- 10
pdf/model/form.go View File

@ -7,6 +7,7 @@ package model
import (
"fmt"
"github.com/unidoc/unidoc/common"
"github.com/unidoc/unidoc/pdf/core"
)
@ -17,10 +18,9 @@ Btn = button
Tx = text
Ch = choice
Sig = signature
*/
*/
// PdfAcroForm represents the AcroForm dictionary used for representation of forms
// in PDF.
// PdfAcroForm represents the AcroForm dictionary used for representation of form data in PDF.
type PdfAcroForm struct {
Fields *[]*PdfField
NeedAppearances *core.PdfObjectBool
@ -31,7 +31,7 @@ type PdfAcroForm struct {
Q *core.PdfObjectInteger
XFA core.PdfObject
primitive *core.PdfIndirectObject
container *core.PdfIndirectObject
}
// NewPdfAcroForm returns a new PdfAcroForm with an intialized container (indirect object).
@ -40,11 +40,31 @@ func NewPdfAcroForm() *PdfAcroForm {
container := &core.PdfIndirectObject{}
container.PdfObject = core.MakeDict()
acroForm.container = container
acroForm.primitive = container
return acroForm
}
// flattenFields returns a flattened list of field hierarchy.
func flattenFields(field *PdfField) []*PdfField {
list := []*PdfField{field}
for _, k := range field.Kids {
list = append(list, flattenFields(k)...)
}
return list
}
// AllFields returns a flattened list of all fields in the form.
func (form *PdfAcroForm) AllFields() []*PdfField {
fields := []*PdfField{}
if form.Fields != nil {
for _, field := range *form.Fields {
fields = append(fields, flattenFields(field)...)
}
}
return fields
}
// newPdfAcroFormFromDict is used when loading forms from PDF files.
func (r *PdfReader) newPdfAcroFormFromDict(d *core.PdfObjectDictionary) (*PdfAcroForm, error) {
acroForm := NewPdfAcroForm()
@ -154,13 +174,13 @@ func (r *PdfReader) newPdfAcroFormFromDict(d *core.PdfObjectDictionary) (*PdfAcr
// GetContainingPdfObject returns the container of the PdfAcroForm (indirect object).
func (this *PdfAcroForm) GetContainingPdfObject() core.PdfObject {
return this.primitive
return this.container
}
// ToPdfObject converts PdfAcroForm to a PdfObject, i.e. an indirect object containing the
// AcroForm dictionary.
func (this *PdfAcroForm) ToPdfObject() core.PdfObject {
container := this.primitive
container := this.container
dict := container.PdfObject.(*core.PdfObjectDictionary)
if this.Fields != nil {
@ -176,7 +196,6 @@ func (this *PdfAcroForm) ToPdfObject() core.PdfObject {
}
if this.SigFlags != nil {
dict.Set("SigFlags", this.SigFlags)
}
if this.CO != nil {
dict.Set("CO", this.CO)
@ -196,5 +215,3 @@ func (this *PdfAcroForm) ToPdfObject() core.PdfObject {
return container
}

+ 17
- 1
pdf/model/reader.go View File

@ -695,7 +695,8 @@ func (this *PdfReader) traverseObjectData(o PdfObject) error {
return nil
}
// Get a page by the page number. Indirect object with type /Page.
// GetPageAsIndirectObject returns the indirect object representing a page fro a given page number.
// Indirect object with type /Page.
func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error) {
if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() {
return nil, fmt.Errorf("File needs to be decrypted first")
@ -706,6 +707,7 @@ func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error
page := this.pageList[pageNumber-1]
// Look up all references related to page and load everything.
// XXX/TODO: Use of traverse object data will be limited when lazy-loading is supported.
err := this.traverseObjectData(page)
if err != nil {
return nil, err
@ -716,6 +718,20 @@ func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error
return page, nil
}
// PageFromIndirectObject returns the PdfPage and page number for a given indirect object.
func (r *PdfReader) PageFromIndirectObject(ind *PdfIndirectObject) (*PdfPage, int, error) {
if len(r.PageList) != len(r.pageList) {
return nil, 0, errors.New("page list invalid")
}
for i, pageind := range r.pageList {
if pageind == ind {
return r.PageList[i], i + 1, nil
}
}
return nil, 0, errors.New("Page not found")
}
// Get a page by the page number.
// Returns the PdfPage entry.
func (this *PdfReader) GetPage(pageNumber int) (*PdfPage, error) {


Loading…
Cancel
Save