Browse Source

JBIG2 Generic Encoder (#264)

* Prepared skeleton and basic component implementations for the jbig2 encoding.

* Added Bitset. Implemented Bitmap.

* Decoder with old Arithmetic Decoder

* Partly working arithmetic

* Working arithmetic decoder.

* MMR patched.

* rebuild to apache.

* Working generic

* Working generic

* Decoded full document

* Update Jenkinsfile go version [master] (#398)

* Update Jenkinsfile go version

* Decoded AnnexH document

* Minor issues fixed.

* Update README.md

* Fixed generic region errors. Added benchmark. Added bitmap unpadder. Added Bitmap toImage method.

* Fixed endofpage error

* Added integration test.

* Decoded all test files without errors. Implemented JBIG2Global.

* Merged with v3 version

* Fixed the EOF in the globals issue

* Fixed the JBIG2 ChocolateData Decode

* JBIG2 Added license information

* Minor fix in jbig2 encoding.

* Applied the logging convention

* Cleaned unnecessary imports

* Go modules clear unused imports

* checked out the README.md

* Moved trace to Debug. Fixed the build integrate tag in the document_decode_test.go

* Initial encoder skeleton

* Applied UniPDF Developer Guide. Fixed lint issues.

* Cleared documentation, fixed style issues.

* Added jbig2 doc.go files. Applied unipdf guide style.

* Minor code style changes.

* Minor naming and style issues fixes.

* Minor naming changes. Style issues fixed.

* Review r11 fixes.

* Added JBIG2 Encoder skeleton.

* Moved Document and Page to jbig2/document package. Created decoder package responsible for decoding jbig2 stream.

* Implemented raster functions.

* Added raster uni low test funcitons.

* Added raster low test functions

* untracked files on jbig2-encoder: c869089 Added raster low test functions

* index on jbig2-encoder: c869089 Added raster low test functions

* Added morph files.

* implemented jbig2 encoder basics

* JBIG2 Encoder - Generic method

* Added jbig2 image encode ttests, black/white image tests

* cleaned and tested jbig2 package

* unfinished jbig2 classified encoder

* jbig2 minor style changes

* minor jbig2 encoder changes

* prepared JBIG2 Encoder

* Style and lint fixes

* Minor changes and lints

* Fixed shift unsinged value build errors

* Minor naming change

* Added jbig2 encode, image gondels. Fixed jbig2 decode bug.

* Provided jbig2 core.DecodeGlobals function.

* Fixed JBIG2Encoder `r6` revision issues.

* Removed public JBIG2Encoder document.

* Minor style changes

* added NewJBIG2Encoder function.

* fixed JBIG2Encoder 'r9' revision issues.

* Cleared 'r9' commented code.

* Updated ACKNOWLEDGEMENETS. Fixed JBIG2Encoder 'r10' revision issues.

Co-authored-by: Gunnsteinn Hall <gunnsteinn.hall@gmail.com>
master
Jacek Kucharczyk 1 year ago
committed by GitHub
parent
commit
c582323a8f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
128 changed files with 24268 additions and 2496 deletions
  1. +21
    -1
      ACKNOWLEDGEMENTS.md
  2. BIN
      checkerboard-squares-black-white.jb2
  3. +94
    -5
      common/logging.go
  4. +0
    -175
      core/encoding.go
  5. +436
    -0
      core/encoding_jbig2.go
  6. +52
    -0
      core/encoding_jbig2_test.go
  7. +1
    -1
      core/stream.go
  8. BIN
      core/testdata/test.png
  9. +2
    -0
      go.mod
  10. +8
    -0
      internal/endian/doc.go
  11. +39
    -0
      internal/endian/endianness.go
  12. +7
    -0
      internal/jbig2/basic/doc.go
  13. +41
    -0
      internal/jbig2/basic/hash.go
  14. +48
    -0
      internal/jbig2/basic/math.go
  15. +97
    -0
      internal/jbig2/basic/nums.go
  16. +58
    -0
      internal/jbig2/basic/stack.go
  17. +342
    -0
      internal/jbig2/bitmap/bin-expand.go
  18. +718
    -0
      internal/jbig2/bitmap/bin-expand_test.go
  19. +360
    -0
      internal/jbig2/bitmap/bin-reduce.go
  20. +177
    -0
      internal/jbig2/bitmap/bin-reduce_test.go
  21. +1007
    -41
      internal/jbig2/bitmap/bitmap.go
  22. +940
    -17
      internal/jbig2/bitmap/bitmap_test.go
  23. +3
    -1
      internal/jbig2/bitmap/blit.go
  24. +212
    -0
      internal/jbig2/bitmap/boxes.go
  25. +44
    -0
      internal/jbig2/bitmap/boxes_test.go
  26. +17
    -0
      internal/jbig2/bitmap/color.go
  27. +14
    -3
      internal/jbig2/bitmap/combine.go
  28. +353
    -0
      internal/jbig2/bitmap/components.go
  29. +1395
    -0
      internal/jbig2/bitmap/components_test.go
  30. +530
    -0
      internal/jbig2/bitmap/correlation.go
  31. +332
    -0
      internal/jbig2/bitmap/correlation_test.go
  32. +3
    -0
      internal/jbig2/bitmap/doc.go
  33. +59
    -0
      internal/jbig2/bitmap/location.go
  34. +761
    -0
      internal/jbig2/bitmap/morph.go
  35. +1473
    -0
      internal/jbig2/bitmap/morph_test.go
  36. +0
    -5
      internal/jbig2/bitmap/operators.go
  37. +242
    -0
      internal/jbig2/bitmap/points.go
  38. +109
    -0
      internal/jbig2/bitmap/points_test.go
  39. +1722
    -0
      internal/jbig2/bitmap/raster.go
  40. +1622
    -0
      internal/jbig2/bitmap/raster_test.go
  41. +203
    -0
      internal/jbig2/bitmap/rgbtobw.go
  42. +656
    -0
      internal/jbig2/bitmap/seed-fill.go
  43. +384
    -0
      internal/jbig2/bitmap/seed-fill_test.go
  44. +66
    -0
      internal/jbig2/bitmap/selection.go
  45. +442
    -0
      internal/jbig2/bitmap/slices.go
  46. +211
    -0
      internal/jbig2/bitmap/slices_test.go
  47. +492
    -0
      internal/jbig2/bitmap/testhelpers.go
  48. +47
    -0
      internal/jbig2/decode.go
  49. +2
    -36
      internal/jbig2/decoder/arithmetic/arithmetic.go
  50. +2
    -1
      internal/jbig2/decoder/arithmetic/arithmetic_test.go
  51. +131
    -0
      internal/jbig2/decoder/decoder.go
  52. +7
    -0
      internal/jbig2/decoder/doc.go
  53. +1
    -1
      internal/jbig2/decoder/huffman/encoded_table.go
  54. +7
    -3
      internal/jbig2/decoder/huffman/node.go
  55. +1
    -10
      internal/jbig2/decoder/huffman/table.go
  56. +0
    -11
      internal/jbig2/decoder/mmr/code.go
  57. +0
    -18
      internal/jbig2/decoder/mmr/const.go
  58. +6
    -2
      internal/jbig2/decoder/mmr/mmr.go
  59. +1
    -1
      internal/jbig2/doc.go
  60. +0
    -290
      internal/jbig2/document.go
  61. +8
    -0
      internal/jbig2/document/doc.go
  62. +804
    -0
      internal/jbig2/document/document.go
  63. +257
    -137
      internal/jbig2/document/document_test.go
  64. +81
    -0
      internal/jbig2/document/globals.go
  65. +537
    -0
      internal/jbig2/document/page.go
  66. +0
    -0
      internal/jbig2/document/segments/doc.go
  67. +0
    -0
      internal/jbig2/document/segments/eos.go
  68. +9
    -15
      internal/jbig2/document/segments/generic-refinement-region.go
  69. +5
    -0
      internal/jbig2/document/segments/generic-refinement-region_test.go
  70. +214
    -64
      internal/jbig2/document/segments/generic-region.go
  71. +201
    -0
      internal/jbig2/document/segments/generic-region_test.go
  72. +2
    -6
      internal/jbig2/document/segments/halftone-segment.go
  73. +664
    -0
      internal/jbig2/document/segments/header.go
  74. +263
    -0
      internal/jbig2/document/segments/header_test.go
  75. +15
    -5
      internal/jbig2/document/segments/interfaces.go
  76. +0
    -0
      internal/jbig2/document/segments/kind.go
  77. +146
    -12
      internal/jbig2/document/segments/page-information.go
  78. +94
    -0
      internal/jbig2/document/segments/page-information_test.go
  79. +0
    -0
      internal/jbig2/document/segments/pattern-dictionary.go
  80. +0
    -0
      internal/jbig2/document/segments/pattern-dictionary_test.go
  81. +169
    -0
      internal/jbig2/document/segments/region.go
  82. +50
    -0
      internal/jbig2/document/segments/region_test.go
  83. +1
    -1
      internal/jbig2/document/segments/sequence.go
  84. +8
    -9
      internal/jbig2/document/segments/structs_test.go
  85. +483
    -161
      internal/jbig2/document/segments/symbol-dictionary.go
  86. +23
    -23
      internal/jbig2/document/segments/symbol-dictionary_test.go
  87. +0
    -0
      internal/jbig2/document/segments/table_segment.go
  88. +495
    -269
      internal/jbig2/document/segments/text-region.go
  89. +77
    -0
      internal/jbig2/encoder/arithmetic/class.go
  90. +33
    -0
      internal/jbig2/encoder/arithmetic/common.go
  91. +26
    -0
      internal/jbig2/encoder/arithmetic/context.go
  92. +2
    -2
      internal/jbig2/encoder/arithmetic/doc.go
  93. +635
    -0
      internal/jbig2/encoder/arithmetic/encoder.go
  94. +116
    -0
      internal/jbig2/encoder/arithmetic/encoder_test.go
  95. +70
    -0
      internal/jbig2/encoder/arithmetic/state.go
  96. +268
    -0
      internal/jbig2/encoder/classer/classer.go
  97. +121
    -0
      internal/jbig2/encoder/classer/common.go
  98. +233
    -0
      internal/jbig2/encoder/classer/correlation.go
  99. +8
    -0
      internal/jbig2/encoder/classer/doc.go
  100. +278
    -0
      internal/jbig2/encoder/classer/hausdorff.go

+ 21
- 1
ACKNOWLEDGEMENTS.md View File

@ -112,4 +112,24 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```
* [Apache Java PDFBox JBIG2 Decoder](https://github.com/apache/pdfbox-jbig2), Apache License 2.0.
- Used as a base for the JBIG2 image decoder.
In order to achieve full support for the JBIG2 Decoder, it was necessary to implement all possible decoding
combinations defined in the JBIG2 standard, aka ITU T.88 and ISO/IEC 14492.
With a lack of Golang JBIG2 Open Source package, we’ve decided that it would be best to base our own implementation
on some solid and reliable library.
The Apache PDFBox JBIG2 library fulfilled all our requirements. It has a really good quality of the code along with
the detailed comments on each function and class. It also implemented MMR, Huffman tables and arithmetic
decompressors along with all JBIG2 segments.
* [AGL JBIG2 Encoder](https://github.com/agl/jbig2enc), Apache License 2.0.
The complexity and lack of comprehensive documentation for the JBIG2 encoding process, lead us to look at the
AGL JBIG2 Encoder library. At the moment of implementing our encoder it was the only Open Source JBIG2 encoder.
It’s a C++ based library that implements both lossless and lossy encoding methods, where most of the image
operations are done using DanBloomberg Leptonica library.
The core encoding processes in the UniPDF JBIG2 Encoder were based on that well documented and solid library
* [DanBloomberg Leptonica](https://github.com/DanBloomberg/leptonica), The 2-Clause BSD License,
DanBloomberg Leptonica is an amazing C/C++ Open Source library. It provides raster operations, binary expansion and
reduction, JBIG2 component creators, correlation scoring and a lot more perfectly commented image operation functions.
That library was used as a very solid base for our image operation algorithms used by the JBIG2 Encoder.

BIN
checkerboard-squares-black-white.jb2 View File


+ 94
- 5
common/logging.go View File

@ -7,11 +7,13 @@ package common
import (
"fmt"
"io"
"os"
"path/filepath"
"runtime"
)
// Logger is the interface used for logging in the unipdf package.
type Logger interface {
Error(format string, args ...interface{})
Warning(format string, args ...interface{})
@ -57,6 +59,8 @@ func (DummyLogger) IsLogLevel(level LogLevel) bool {
// LogLevel is the verbosity level for logging.
type LogLevel int
// Defines log level enum where the most important logs have the lowest values.
// I.e. level error = 0 and level trace = 5
const (
LogLevelTrace LogLevel = 5
LogLevelDebug LogLevel = 4
@ -66,14 +70,14 @@ const (
LogLevelError LogLevel = 0
)
// ConsoleLogger is a logger that writes logs to the 'os.Stdout'
type ConsoleLogger struct {
LogLevel LogLevel
}
// NewConsoleLogger creates new console logger.
func NewConsoleLogger(logLevel LogLevel) *ConsoleLogger {
logger := ConsoleLogger{}
logger.LogLevel = logLevel
return &logger
return &ConsoleLogger{LogLevel: logLevel}
}
// IsLogLevel returns true if log level is greater or equal than `level`.
@ -82,6 +86,7 @@ func (l ConsoleLogger) IsLogLevel(level LogLevel) bool {
return l.LogLevel >= level
}
// Error logs error message.
func (l ConsoleLogger) Error(format string, args ...interface{}) {
if l.LogLevel >= LogLevelError {
prefix := "[ERROR] "
@ -89,6 +94,7 @@ func (l ConsoleLogger) Error(format string, args ...interface{}) {
}
}
// Warning logs warning message.
func (l ConsoleLogger) Warning(format string, args ...interface{}) {
if l.LogLevel >= LogLevelWarning {
prefix := "[WARNING] "
@ -96,6 +102,7 @@ func (l ConsoleLogger) Warning(format string, args ...interface{}) {
}
}
// Notice logs notice message.
func (l ConsoleLogger) Notice(format string, args ...interface{}) {
if l.LogLevel >= LogLevelNotice {
prefix := "[NOTICE] "
@ -103,6 +110,7 @@ func (l ConsoleLogger) Notice(format string, args ...interface{}) {
}
}
// Info logs info message.
func (l ConsoleLogger) Info(format string, args ...interface{}) {
if l.LogLevel >= LogLevelInfo {
prefix := "[INFO] "
@ -110,6 +118,7 @@ func (l ConsoleLogger) Info(format string, args ...interface{}) {
}
}
// Debug logs debug message.
func (l ConsoleLogger) Debug(format string, args ...interface{}) {
if l.LogLevel >= LogLevelDebug {
prefix := "[DEBUG] "
@ -117,6 +126,7 @@ func (l ConsoleLogger) Debug(format string, args ...interface{}) {
}
}
// Trace logs trace message.
func (l ConsoleLogger) Trace(format string, args ...interface{}) {
if l.LogLevel >= LogLevelTrace {
prefix := "[TRACE] "
@ -124,14 +134,93 @@ func (l ConsoleLogger) Trace(format string, args ...interface{}) {
}
}
// output writes `format`, `args` log message prefixed by the source file name, line and `prefix`
func (l ConsoleLogger) output(f io.Writer, prefix string, format string, args ...interface{}) {
logToWriter(f, prefix, format, args...)
}
var Log Logger = DummyLogger{}
// SetLogger sets 'logger' to be used by the unidoc unipdf library.
func SetLogger(logger Logger) {
Log = logger
}
// output writes `format`, `args` log message prefixed by the source file name, line and `prefix`
func (l ConsoleLogger) output(f *os.File, prefix string, format string, args ...interface{}) {
// WriterLogger is the logger that writes data to the Output writer
type WriterLogger struct {
LogLevel LogLevel
Output io.Writer
}
// NewWriterLogger creates new 'writer' logger.
func NewWriterLogger(logLevel LogLevel, writer io.Writer) *WriterLogger {
logger := WriterLogger{
Output: writer,
LogLevel: logLevel,
}
return &logger
}
// IsLogLevel returns true if log level is greater or equal than `level`.
// Can be used to avoid resource intensive calls to loggers.
func (l WriterLogger) IsLogLevel(level LogLevel) bool {
return l.LogLevel >= level
}
// Error logs error message.
func (l WriterLogger) Error(format string, args ...interface{}) {
if l.LogLevel >= LogLevelError {
prefix := "[ERROR] "
l.logToWriter(l.Output, prefix, format, args...)
}
}
// Warning logs warning message.
func (l WriterLogger) Warning(format string, args ...interface{}) {
if l.LogLevel >= LogLevelWarning {
prefix := "[WARNING] "
l.logToWriter(l.Output, prefix, format, args...)
}
}
// Notice logs notice message.
func (l WriterLogger) Notice(format string, args ...interface{}) {
if l.LogLevel >= LogLevelNotice {
prefix := "[NOTICE] "
l.logToWriter(l.Output, prefix, format, args...)
}
}
// Info logs info message.
func (l WriterLogger) Info(format string, args ...interface{}) {
if l.LogLevel >= LogLevelInfo {
prefix := "[INFO] "
l.logToWriter(l.Output, prefix, format, args...)
}
}
// Debug logs debug message.
func (l WriterLogger) Debug(format string, args ...interface{}) {
if l.LogLevel >= LogLevelDebug {
prefix := "[DEBUG] "
l.logToWriter(l.Output, prefix, format, args...)
}
}
// Trace logs trace message.
func (l WriterLogger) Trace(format string, args ...interface{}) {
if l.LogLevel >= LogLevelTrace {
prefix := "[TRACE] "
l.logToWriter(l.Output, prefix, format, args...)
}
}
// logToWriter writes `format`, `args` log message prefixed by the source file name, line and `prefix`
func (l WriterLogger) logToWriter(f io.Writer, prefix string, format string, args ...interface{}) {
logToWriter(f, prefix, format, args)
}
func logToWriter(f io.Writer, prefix string, format string, args ...interface{}) {
_, file, line, ok := runtime.Caller(2)
if !ok {
file = "???"


+ 0
- 175
core/encoding.go View File

@ -36,7 +36,6 @@ import (
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/internal/ccittfax"
"github.com/unidoc/unipdf/v3/internal/jbig2"
)
// Stream encoding filter names.
@ -1982,180 +1981,6 @@ func (enc *CCITTFaxEncoder) EncodeBytes(data []byte) ([]byte, error) {
return encoder.Encode(pixels), nil
}
// JBIG2Encoder is the jbig2 image encoder (WIP)/decoder.
type JBIG2Encoder struct {
// Globals are the JBIG2 global segments.
Globals jbig2.Globals
// IsChocolateData defines if the data is encoded such that
// binary data '1' means black and '0' white.
// otherwise the data is called vanilla.
// Naming convention taken from: 'https://en.wikipedia.org/wiki/Binary_image#Interpretation'
IsChocolateData bool
}
// NewJBIG2Encoder returns a new instance of JBIG2Encoder.
func NewJBIG2Encoder() *JBIG2Encoder {
return &JBIG2Encoder{}
}
// setChocolateData sets the chocolate data flag when the pdf stream object contains the 'Decode' object.
// Decode object ( PDF32000:2008 7.10.2 Type 0 (Sampled) Functions).
// NOTE: this function is a temporary helper until the samples handle Decode function.
func (enc *JBIG2Encoder) setChocolateData(decode PdfObject) {
arr, ok := decode.(*PdfObjectArray)
if !ok {
common.Log.Debug("JBIG2Encoder - Decode is not an array. %T", decode)
return
}
// (PDF32000:2008 Table 39) The array should be of 2 x n size.
// For binary images n stands for 1bit, thus the array should contain 2 numbers.
vals, err := arr.GetAsFloat64Slice()
if err != nil {
common.Log.Debug("JBIG2Encoder unsupported Decode value. %s", arr.String())
return
}
if len(vals) != 2 {
return
}
first, second := int(vals[0]), int(vals[1])
if first == 1 && second == 0 {
enc.IsChocolateData = true
} else if first == 0 && second == 1 {
enc.IsChocolateData = false
} else {
common.Log.Debug("JBIG2Encoder unsupported DecodeParams->Decode value: %s", arr.String())
}
}
func newJBIG2EncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*JBIG2Encoder, error) {
encoder := NewJBIG2Encoder()
encDict := streamObj.PdfObjectDictionary
if encDict == nil {
// No encoding dictionary.
return encoder, nil
}
// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := encDict.Get("DecodeParms")
if obj != nil {
switch t := obj.(type) {
case *PdfObjectDictionary:
decodeParams = t
break
case *PdfObjectArray:
if t.Len() == 1 {
if dp, ok := GetDict(t.Get(0)); ok {
decodeParams = dp
}
}
default:
common.Log.Error("DecodeParams not a dictionary %#v", obj)
return nil, errors.New("invalid DecodeParms")
}
}
}
if decodeParams != nil {
if globals := decodeParams.Get("JBIG2Globals"); globals != nil {
globalsStream, ok := GetStream(globals)
if !ok {
err := errors.New("the Globals stream should be an Object Stream")
common.Log.Debug("ERROR: %s", err.Error())
return nil, err
}
gdoc, err := jbig2.NewDocument(globalsStream.Stream)
if err != nil {
err = fmt.Errorf("decoding global stream failed. %s", err.Error())
common.Log.Debug("ERROR: %s", err)
return nil, err
}
encoder.Globals = gdoc.GlobalSegments
}
}
// Inverse the bits on the 'Decode [1.0 0.0]' function (PDF32000:2008 7.10.2)
if decode := streamObj.Get("Decode"); decode != nil {
encoder.setChocolateData(decode)
}
return encoder, nil
}
// GetFilterName returns the name of the encoding filter.
func (enc *JBIG2Encoder) GetFilterName() string {
return StreamEncodingFilterNameJBIG2
}
// MakeDecodeParams makes a new instance of an encoding dictionary based on the current encoder settings.
func (enc *JBIG2Encoder) MakeDecodeParams() PdfObject {
return MakeDict()
}
// MakeStreamDict makes a new instance of an encoding dictionary for a stream object.
func (enc *JBIG2Encoder) MakeStreamDict() *PdfObjectDictionary {
dict := MakeDict()
if enc.IsChocolateData {
// /Decode[1.0 0.0] - see note in the 'setChocolateData' method.
dict.Set("Decode", MakeArray(MakeFloat(1.0), MakeFloat(0.0)))
}
dict.Set("Filter", MakeName(enc.GetFilterName()))
return dict
}
// UpdateParams updates the parameter values of the encoder.
func (enc *JBIG2Encoder) UpdateParams(params *PdfObjectDictionary) {
if decode := params.Get("Decode"); decode != nil {
enc.setChocolateData(decode)
}
}
// DecodeBytes decodes a slice of JBIG2 encoded bytes and returns the results.
func (enc *JBIG2Encoder) DecodeBytes(encoded []byte) ([]byte, error) {
// create new JBIG2 document.
doc, err := jbig2.NewDocumentWithGlobals(encoded, enc.Globals)
if err != nil {
return nil, err
}
// the jbig2 PDF document should have only one page, where page numeration
// starts from '1'.
page, err := doc.GetPage(1)
if err != nil {
return nil, err
}
if page == nil {
err = errors.New("jbig2 corrupted data. Page#1 not found")
common.Log.Debug("ERROR: %s", err.Error())
return nil, err
}
// Get the page bitmap data.
bm, err := page.GetBitmap()
if err != nil {
return nil, err
}
bm.GetVanillaData()
// By default the bitmap data contains the rowstride padding.
// In order to get rid of the rowstride padding use the bitmap.GetUnpaddedData method.
return bm.GetUnpaddedData()
}
// DecodeStream decodes a JBIG2 encoded stream and returns the result as a slice of bytes.
func (enc *JBIG2Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return enc.DecodeBytes(streamObj.Stream)
}
// EncodeBytes encodes the passed slice in slice of bytes into JBIG2.
func (enc *JBIG2Encoder) EncodeBytes(data []byte) ([]byte, error) {
common.Log.Debug("Error: Attempting to use unsupported encoding %s", enc.GetFilterName())
return data, ErrNoJBIG2Decode
}
// JPXEncoder implements JPX encoder/decoder (dummy, for now)
// FIXME: implement
type JPXEncoder struct{}


+ 436
- 0
core/encoding_jbig2.go View File

@ -0,0 +1,436 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package core
import (
"bytes"
"image"
"image/color"
"github.com/unidoc/unipdf/v3/common"
"github.com/unidoc/unipdf/v3/internal/jbig2"
"github.com/unidoc/unipdf/v3/internal/jbig2/bitmap"
"github.com/unidoc/unipdf/v3/internal/jbig2/decoder"
"github.com/unidoc/unipdf/v3/internal/jbig2/document"
"github.com/unidoc/unipdf/v3/internal/jbig2/errors"
)
// JBIG2CompressionType defines the enum compression type used by the JBIG2Encoder.
type JBIG2CompressionType int
const (
// JB2Generic is the JBIG2 compression type that uses generic region see 6.2.
JB2Generic JBIG2CompressionType = iota
// JB2SymbolCorrelation is the JBIG2 compression type that uses symbol dictionary and text region encoding procedure
// with the correlation classification.
// NOT IMPLEMENTED YET.
JB2SymbolCorrelation
// JB2SymbolRankHaus is the JBIG2 compression type that uses symbol dictionary and text region encoding procedure
// with the rank hausdorff classification. RankHausMode uses the rank Hausdorff method that classifies the input images.
// It is more robust, more susceptible to confusing components that should be in different classes.
// NOT IMPLEMENTED YET.
JB2SymbolRankHaus
)
// JB2ImageAutoThreshold is the const value used by the 'GoImageToJBIG2Image function' used to set auto threshold
// for the histogram.
const JB2ImageAutoThreshold = -1.0
//
// JBIG2Encoder/Decoder
//
// JBIG2Encoder implements both jbig2 encoder and the decoder. The encoder allows to encode
// provided images (best used document scans) in multiple way. By default it uses single page generic
// encoder. It allows to store lossless data as a single segment.
// In order to store multiple image pages use the 'FileMode' which allows to store more pages within single jbig2 document.
// WIP: In order to obtain better compression results the encoder would allow to encode the input in a
// lossy or lossless way with a component (symbol) mode. It divides the image into components.
// Then checks if any component is 'similar' to the others and maps them together. The symbol classes are stored
// in the dictionary. Then the encoder creates text regions which uses the related symbol classes to fill it's space.
// The similarity is defined by the 'Threshold' variable (default: 0.95). The less the value is, the more components
// matches to single class, thus the compression is better, but the result might become lossy.
type JBIG2Encoder struct {
d *document.Document
// Globals are the JBIG2 global segments.
Globals jbig2.Globals
// IsChocolateData defines if the data is encoded such that
// binary data '1' means black and '0' white.
// otherwise the data is called vanilla.
// Naming convention taken from: 'https://en.wikipedia.org/wiki/Binary_image#Interpretation'
IsChocolateData bool
// DefaultPageSettings are the settings parameters used by the jbig2 encoder.
DefaultPageSettings JBIG2EncoderSettings
}
// NewJBIG2Encoder creates a new JBIG2Encoder.
func NewJBIG2Encoder() *JBIG2Encoder {
return &JBIG2Encoder{}
}
// AddPageImage adds the page with the image 'img' to the encoder context in order to encode it jbig2 document.
// The 'settings' defines what encoding type should be used by the encoder.
func (enc *JBIG2Encoder) AddPageImage(img *JBIG2Image, settings *JBIG2EncoderSettings) (err error) {
const processName = "JBIG2Document.AddPageImage"
if enc == nil {
return errors.Error(processName, "JBIG2Document is nil")
}
if settings == nil {
settings = &enc.DefaultPageSettings
}
if enc.d == nil {
enc.d = document.InitEncodeDocument(settings.FileMode)
}
if err = settings.Validate(); err != nil {
return errors.Wrap(err, processName, "")
}
// convert input 'img' to the bitmap.Bitmap
b, err := img.toBitmap()
if err != nil {
return errors.Wrap(err, processName, "")
}
switch settings.Compression {
case JB2Generic:
if err = enc.d.AddGenericPage(b, settings.DuplicatedLinesRemoval); err != nil {
return errors.Wrap(err, processName, "")
}
case JB2SymbolCorrelation:
return errors.Error(processName, "symbol correlation encoding not implemented yet")
case JB2SymbolRankHaus:
return errors.Error(processName, "symbol rank haus encoding not implemented yet")
default:
return errors.Error(processName, "provided invalid compression")
}
return nil
}
// DecodeBytes decodes a slice of JBIG2 encoded bytes and returns the results.
func (enc *JBIG2Encoder) DecodeBytes(encoded []byte) ([]byte, error) {
parameters := decoder.Parameters{UnpaddedData: true}
return jbig2.DecodeBytes(encoded, parameters, enc.Globals)
}
// DecodeGlobals decodes 'encoded' byte stream and returns their Globally defined segments ('Globals').
func (enc *JBIG2Encoder) DecodeGlobals(encoded []byte) (jbig2.Globals, error) {
return jbig2.DecodeGlobals(encoded)
}
// DecodeImages decodes the page images from the jbig2 'encoded' data input.
// The jbig2 document may contain multiple pages, thus the function can return multiple
// images. The images order corresponds to the page number.
func (enc *JBIG2Encoder) DecodeImages(encoded []byte) ([]image.Image, error) {
const processName = "JBIG2Encoder.DecodeImages"
parameters := decoder.Parameters{UnpaddedData: true}
// create decoded document.
d, err := decoder.Decode(encoded, parameters, enc.Globals.ToDocumentGlobals())
if err != nil {
return nil, errors.Wrap(err, processName, "")
}
// get page number in the document.
pageNumber, err := d.PageNumber()
if err != nil {
return nil, errors.Wrap(err, processName, "")
}
// decode all images
images := []image.Image{}
var img image.Image
for i := 1; i <= pageNumber; i++ {
img, err = d.DecodePageImage(i)
if err != nil {
return nil, errors.Wrapf(err, processName, "page: '%d'", i)
}
images = append(images, img)
}
return images, nil
}
// DecodeStream decodes a JBIG2 encoded stream and returns the result as a slice of bytes.
func (enc *JBIG2Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) {
return enc.DecodeBytes(streamObj.Stream)
}
// EncodeBytes encodes slice of bytes into JBIG2 encoding format.
// The input 'data' must be an image. In order to Decode it a user is responsible to
// load the codec ('png', 'jpg').
// Returns jbig2 single page encoded document byte slice. The encoder uses DefaultPageSettings
// to encode given image.
func (enc *JBIG2Encoder) EncodeBytes(data []byte) ([]byte, error) {
const processName = "JBIG2Encoder.EncodeBytes"
if len(data) == 0 {
return nil, errors.Errorf(processName, "input 'data' not defined")
}
i, _, err := image.Decode(bytes.NewReader(data))
if err != nil {
return nil, errors.Wrap(err, processName, "decode input image")
}
encoded, err := enc.encodeImage(i)
if err != nil {
return nil, errors.Wrap(err, processName, "")
}
return encoded, nil
}
// EncodeImage encodes 'img' golang image.Image into jbig2 encoded bytes document using default encoder settings.
func (enc *JBIG2Encoder) EncodeImage(img image.Image) ([]byte, error) {
return enc.encodeImage(img)
}
// Encode encodes previously prepare jbig2 document and stores it as the byte slice.
func (enc *JBIG2Encoder) Encode() (data []byte, err error) {
const processName = "JBIG2Document.Encode"
if enc.d == nil {
return nil, errors.Errorf(processName, "document input data not defined")
}
enc.d.FullHeaders = enc.DefaultPageSettings.FileMode
// encode the document
data, err = enc.d.Encode()
if err != nil {
return nil, errors.Wrap(err, processName, "")
}
return data, nil
}
// GetFilterName returns the name of the encoding filter.
func (enc *JBIG2Encoder) GetFilterName() string {
return StreamEncodingFilterNameJBIG2
}
// MakeDecodeParams makes a new instance of an encoding dictionary based on the current encoder settings.
func (enc *JBIG2Encoder) MakeDecodeParams() PdfObject {
return MakeDict()
}
// MakeStreamDict makes a new instance of an encoding dictionary for a stream object.
func (enc *JBIG2Encoder) MakeStreamDict() *PdfObjectDictionary {
dict := MakeDict()
dict.Set("Filter", MakeName(enc.GetFilterName()))
return dict
}
// UpdateParams updates the parameter values of the encoder.
// The body of this method is empty but required to implement StreamEncoder interface.
func (enc *JBIG2Encoder) UpdateParams(params *PdfObjectDictionary) {
}
func (enc *JBIG2Encoder) encodeImage(i image.Image) ([]byte, error) {
const processName = "encodeImage"
// convert the input into jbig2 image
jbig2Image, err := GoImageToJBIG2(i, JB2ImageAutoThreshold)
if err != nil {
return nil, errors.Wrap(err, processName, "convert input image to jbig2 img")
}
if err = enc.AddPageImage(jbig2Image, &enc.DefaultPageSettings); err != nil {
return nil, errors.Wrap(err, processName, "")
}
return enc.Encode()
}
func newJBIG2DecoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*JBIG2Encoder, error) {
const processName = "newJBIG2DecoderFromStream"
encoder := &JBIG2Encoder{}
encDict := streamObj.PdfObjectDictionary
if encDict == nil {
// No encoding dictionary.
return encoder, nil
}
// If decodeParams not provided, see if we can get from the stream.
if decodeParams == nil {
obj := encDict.Get("DecodeParms")
if obj != nil {
switch t := obj.(type) {
case *PdfObjectDictionary:
decodeParams = t
case *PdfObjectArray:
if t.Len() == 1 {
if dp, ok := GetDict(t.Get(0)); ok {
decodeParams = dp
}
}
default:
common.Log.Error("DecodeParams not a dictionary %#v", obj)
return nil, errors.Errorf(processName, "invalid DecodeParms type: %T", t)
}
}
}
if decodeParams != nil {
if globals := decodeParams.Get("JBIG2Globals"); globals != nil {
var err error
globalsStream, ok := globals.(*PdfObjectStream)
if !ok {
err = errors.Error(processName, "jbig2.Globals stream should be an Object Stream")
common.Log.Debug("ERROR: %s", err.Error())
return nil, err
}
encoder.Globals, err = jbig2.DecodeGlobals(globalsStream.Stream)
if err != nil {
err = errors.Wrap(err, processName, "corrupted jbig2 encoded data")
common.Log.Debug("ERROR: %s", err)
return nil, err
}
}
}
return encoder, nil
}
//
// JBIG2Image
//
// JBIG2Image is the image structure used by the jbig2 encoder. Its Data must be in a
// 1 bit per component and 1 component per pixel (1bpp). In order to create binary image
// use GoImageToJBIG2 function. If the image data contains the row bytes padding set the HasPadding to true.
type JBIG2Image struct {
// Width and Height defines the image boundaries.
Width, Height int
// Data is the byte slice data for the input image
Data []byte
// HasPadding is the attribute that defines if the last byte of the data in the row contains
// 0 bits padding.
HasPadding bool
}
// ToGoImage converts the JBIG2Image to the golang image.Image.
func (j *JBIG2Image) ToGoImage() (image.Image, error) {
const processName = "JBIG2Image.ToGoImage"
bm, err := j.toBitmap()
if err != nil {
return nil, errors.Wrap(err, processName, "")
}
return bm.ToImage(), nil
}
func (j *JBIG2Image) toBitmap() (b *bitmap.Bitmap, err error) {
const processName = "JBIG2Image.toBitmap"
if j.Data == nil {
return nil, errors.Error(processName, "image data not defined")
}
if j.Width == 0 || j.Height == 0 {
return nil, errors.Error(processName, "image height or width not defined")
}
// check if the data already has padding
if j.HasPadding {
b, err = bitmap.NewWithData(j.Width, j.Height, j.Data)
} else {
b, err = bitmap.NewWithUnpaddedData(j.Width, j.Height, j.Data)
}
if err != nil {
return nil, errors.Wrap(err, processName, "")
}
return b, nil
}
// GoImageToJBIG2 creates a binary image on the base of 'i' golang image.Image.
// If the image is not a black/white image then the function converts provided input into
// JBIG2Image with 1bpp. For non grayscale images the function performs the conversion to the grayscale temp image.
// Then it checks the value of the gray image value if it's within bounds of the black white threshold.
// This 'bwThreshold' value should be in range (0.0, 1.0). The threshold checks if the grayscale pixel (uint) value
// is greater or smaller than 'bwThreshold' * 255. Pixels inside the range will be white, and the others will be black.
// If the 'bwThreshold' is equal to -1.0 - JB2ImageAutoThreshold then it's value would be set on the base of
// it's histogram using Triangle method. For more information go to:
// https://www.mathworks.com/matlabcentral/fileexchange/28047-gray-image-thresholding-using-the-triangle-method
func GoImageToJBIG2(i image.Image, bwThreshold float64) (*JBIG2Image, error) {
const processName = "GoImageToJBIG2"
if i == nil {
return nil, errors.Error(processName, "image 'i' not defined")
}
var th uint8
if bwThreshold == JB2ImageAutoThreshold {
// autoThreshold using triangle method
gray := bitmap.ImgToGray(i)
histogram := bitmap.GrayImageHistogram(gray)
th = bitmap.AutoThresholdTriangle(histogram)
i = gray
} else if bwThreshold > 1.0 || bwThreshold < 0.0 {
// check if bwThreshold is unknown - set to 0.0 is not in the allowed range.
return nil, errors.Error(processName, "provided threshold is not in a range {0.0, 1.0}")
} else {
th = uint8(255 * bwThreshold)
}
gray := bitmap.ImgToBinary(i, th)
return bwToJBIG2Image(gray), nil
}
func bwToJBIG2Image(i *image.Gray) *JBIG2Image {
bounds := i.Bounds()
// compute the rowStride - number of bytes in the row.
bm := bitmap.New(bounds.Dx(), bounds.Dy())
ji := &JBIG2Image{Height: bounds.Dy(), Width: bounds.Dx(), HasPadding: true}
// allocate the byte slice data
var pix color.Gray
for y := 0; y < bounds.Dy(); y++ {
for x := 0; x < bounds.Dx(); x++ {
pix = i.GrayAt(x, y)
// check if the pixel is black or white
// where black pixel would be stored as '1' bit
// and the white as '0' bit.
// the pix is color.Black if it's Y value is '0'.
if pix.Y == 0 {
if err := bm.SetPixel(x, y, 1); err != nil {
common.Log.Debug("can't set pixel at bitmap: %v", bm)
}
}
}
}
ji.Data = bm.Data
return ji
}
// JBIG2EncoderSettings contains the parameters and settings used by the JBIG2Encoder.
// Current version works only on JB2Generic compression.
type JBIG2EncoderSettings struct {
// FileMode defines if the jbig2 encoder should return full jbig2 file instead of
// shortened pdf mode. This adds the file header to the jbig2 definition.
FileMode bool
// Compression is the setting that defines the compression type used for encoding the page.
Compression JBIG2CompressionType
// DuplicatedLinesRemoval code generic region in a way such that if the lines are duplicated the encoder
// doesn't store it twice.
DuplicatedLinesRemoval bool
// DefaultPixelValue is the bit value initial for every pixel in the page.
DefaultPixelValue uint8
// ResolutionX optional setting that defines the 'x' axis input image resolution - used for single page encoding.
ResolutionX int
// ResolutionY optional setting that defines the 'y' axis input image resolution - used for single page encoding.
ResolutionY int
// Threshold defines the threshold of the image correlation for
// non Generic compression.
// User only for JB2SymbolCorrelation and JB2SymbolRankHaus methods.
// Best results in range [0.7 - 0.98] - the less the better the compression would be
// but the more lossy.
// Default value: 0.95
Threshold float64
}
// Validate validates the page settings for the JBIG2 encoder.
func (s JBIG2EncoderSettings) Validate() error {
const processName = "validateEncoder"
if s.Threshold < 0 || s.Threshold > 1.0 {
return errors.Errorf(processName, "provided threshold value: '%v' must be in range [0.0, 1.0]", s.Threshold)
}
if s.ResolutionX < 0 {
return errors.Errorf(processName, "provided x resolution: '%d' must be positive or zero value", s.ResolutionX)
}
if s.ResolutionY < 0 {
return errors.Errorf(processName, "provided y resolution: '%d' must be positive or zero value", s.ResolutionY)
}
if s.DefaultPixelValue != 0 && s.DefaultPixelValue != 1 {
return errors.Errorf(processName, "default pixel value: '%d' must be a value for the bit: {0,1}", s.DefaultPixelValue)
}
if s.Compression != JB2Generic {
return errors.Errorf(processName, "provided compression is not implemented yet")
}
return nil
}

+ 52
- 0
core/encoding_jbig2_test.go View File

@ -0,0 +1,52 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package core
import (
"image"
"image/color"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/unidoc/unipdf/v3/internal/jbig2/bitmap"
)
// TestImageToJBIG2Image tests conversion of image.Image to JBIG2Image
func TestImageToJBIG2Image(t *testing.T) {
t.Run("BlackWhite", func(t *testing.T) {
// having a test black white image of a frame.
// The frame has a width of 2 bits.
g := image.NewGray(image.Rect(0, 0, 50, 50))
bounds := g.Bounds()
bm := bitmap.New(50, 50)
setPix := func(x, y int) {
g.SetGray(x, y, color.Gray{})
assert.NoError(t, bm.SetPixel(x, y, 1))
}
for x := 0; x < bounds.Dx(); x++ {
for y := 0; y < bounds.Dy(); y++ {
switch x {
case 0, 1, 48, 49:
setPix(x, y)
default:
if !(y > 1 && y < 48) {
setPix(x, y)
} else {
g.SetGray(x, y, color.Gray{Y: 255})
}
}
}
}
// execute GoImageToJBIG2 and check jbig2 images.
jb2, err := GoImageToJBIG2(g, JB2ImageAutoThreshold)
require.NoError(t, err)
assert.Equal(t, jb2.Data, bm.Data)
})
}

+ 1
- 1
core/stream.go View File

@ -71,7 +71,7 @@ func NewEncoderFromStream(streamObj *PdfObjectStream) (StreamEncoder, error) {
case StreamEncodingFilterNameCCITTFax:
return newCCITTFaxEncoderFromStream(streamObj, nil)
case StreamEncodingFilterNameJBIG2:
return newJBIG2EncoderFromStream(streamObj, nil)
return newJBIG2DecoderFromStream(streamObj, nil)
case StreamEncodingFilterNameJPX:
return NewJPXEncoder(), nil
}


BIN
core/testdata/test.png View File

Before After
Width: 300  |  Height: 300  |  Size: 3.0 KiB

+ 2
- 0
go.mod View File

@ -1,5 +1,7 @@
module github.com/unidoc/unipdf/v3
go 1.11
require (
github.com/adrg/sysfont v0.1.0
github.com/boombuler/barcode v1.0.0


+ 8
- 0
internal/endian/doc.go View File

@ -0,0 +1,8 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
// Package endian detects the platform specific byte endianness. On initialization
// the package checks if the system is using big or little endian byte ordering.
package endian

+ 39
- 0
internal/endian/endianness.go View File

@ -0,0 +1,39 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package endian
import (
"encoding/binary"
"unsafe"
)
var (
// ByteOrder is the current system byte order.
ByteOrder binary.ByteOrder
isBigEndian bool
)
func init() {
const intSize = int(unsafe.Sizeof(0))
i := 1
byteSlice := (*[intSize]byte)(unsafe.Pointer(&i))
if byteSlice[0] == 0 {
isBigEndian = true
ByteOrder = binary.BigEndian
} else {
ByteOrder = binary.LittleEndian
}
}
// IsBig checks if the machine uses the Big Endian byte order.
func IsBig() bool {
return isBigEndian
}
// IsLittle checks if the machine uses Little Endian byte ordering.
func IsLittle() bool {
return !isBigEndian
}

+ 7
- 0
internal/jbig2/basic/doc.go View File

@ -0,0 +1,7 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
// Package basic contains common structures, slices and maps used within the jbig2 package.
package basic

+ 41
- 0
internal/jbig2/basic/hash.go View File

@ -0,0 +1,41 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package basic
// IntsMap is a wrapper over the map[uint64][]int.
// The 'key' collisions are stored under subsequent slice positions.
type IntsMap map[uint64][]int
// Add adds the 'value' to the ints map at the 'key'.
func (i IntsMap) Add(key uint64, value int) {
i[key] = append(i[key], value)
}
// Get gets the first int value at the 'key'.
func (i IntsMap) Get(key uint64) (int, bool) {
v, ok := i[key]
if !ok {
return 0, false
}
if len(v) == 0 {
return 0, false
}
return v[0], true
}
// GetSlice gets the int slice located at the 'key'.
func (i IntsMap) GetSlice(key uint64) ([]int, bool) {
v, ok := i[key]
if !ok {
return nil, false
}
return v, true
}
// Delete delete the 'key' records.
func (i IntsMap) Delete(key uint64) {
delete(i, key)
}

+ 48
- 0
internal/jbig2/basic/math.go View File

@ -0,0 +1,48 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package basic
// Abs get the absolute value of the integer 'v'.
func Abs(v int) int {
if v > 0 {
return v
}
return -v
}
// Ceil gets the 'ceil' value for the provided 'numerator' and 'denominator'.
func Ceil(numerator, denominator int) int {
if numerator%denominator == 0 {
return numerator / denominator
}
return (numerator / denominator) + 1
}
// Max gets the maximum value from the provided 'x', 'y' arguments.
func Max(x, y int) int {
if x > y {
return x
}
return y
}
// Min gets the minimal value from the provided 'x' and 'y' arguments.
func Min(x, y int) int {
if x < y {
return x
}
return y
}
// Sign gets the float32 sign of the 'v' value.
// If the value 'v' is greater or equal to 0.0 the function returns 1.0.
// Otherwise it returns '-1.0'.
func Sign(v float32) float32 {
if v >= 0.0 {
return 1.0
}
return -1.0
}

+ 97
- 0
internal/jbig2/basic/nums.go View File

@ -0,0 +1,97 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package basic
import (
"github.com/unidoc/unipdf/v3/internal/jbig2/errors"
)
// IntSlice is the integer slice that contains panic safe methods.
type IntSlice []int
// NewIntSlice creates new integer slice.
func NewIntSlice(i int) *IntSlice {
sl := IntSlice(make([]int, i))
return &sl
}
// Add adds the integer 'v' to the slice
func (i *IntSlice) Add(v int) error {
if i == nil {
return errors.Error("IntSlice.Add", "slice not defined")
}
*i = append(*i, v)
return nil
}
// Copy creates a copy of given int slice.
func (i *IntSlice) Copy() *IntSlice {
cp := IntSlice(make([]int, len(*i)))
copy(cp, *i)
return &cp
}
// Get gets the integer at 'index'.
// Returns error if the index is out of range or given integer doesn't exists.
func (i IntSlice) Get(index int) (int, error) {
if index > len(i)-1 {
return 0, errors.Errorf("IntSlice.Get", "index: %d out of range", index)
}
return i[index], nil
}
// Size returns the size of the int slice.
func (i IntSlice) Size() int {
return len(i)
}
// NewNumSlice creates a new NumSlice pointer.
func NewNumSlice(i int) *NumSlice {
arr := NumSlice(make([]float32, i))
return &arr
}
// NumSlice is the slice of the numbers that has a panic safe API.
type NumSlice []float32
// Add adds the float32 'v' value.
func (n *NumSlice) Add(v float32) {
*n = append(*n, v)
}
// AddInt adds the 'v' integer value to the num slice.
func (n *NumSlice) AddInt(v int) {
*n = append(*n, float32(v))
}
// Get the float32 value at 'i' index. Returns error if the index 'i' is out of range.
func (n NumSlice) Get(i int) (float32, error) {
if i < 0 || i > len(n)-1 {
return 0, errors.Errorf("NumSlice.Get", "index: '%d' out of range", i)
}
return n[i], nil
}
// GetInt gets the integer value at the 'i' position.
// The functions return errors if the index 'i' is out of range.
// Returns '0' on error.
func (n NumSlice) GetInt(i int) (int, error) {
const processName = "GetInt"
if i < 0 || i > len(n)-1 {
return 0, errors.Errorf(processName, "index: '%d' out of range", i)
}
v := n[i]
return int(v + Sign(v)*0.5), nil
}
// GetIntSlice gets the slice of integers from the provided 'NumSlice' values.
func (n NumSlice) GetIntSlice() []int {
sl := make([]int, len(n))
for i, v := range n {
sl[i] = int(v)
}
return sl
}

+ 58
- 0
internal/jbig2/basic/stack.go View File

@ -0,0 +1,58 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package basic
// Stack is the LIFO data structure implementation
type Stack struct {
// Data keeps the stack's values.
Data []interface{}
// Aux is the auxiliary additional stack use for some helpers.
Aux *Stack
}
// Len returns the size of the stack.
func (s *Stack) Len() int {
return len(s.Data)
}
// Peek returns the top element of the stack 's'.
// returns false if the stack is zero length.
func (s *Stack) Peek() (v interface{}, ok bool) {
return s.peek()
}
// Pop the top element of the slack and returns it.
// Returns false if the stack is 'zero' length.
func (s *Stack) Pop() (v interface{}, ok bool) {
v, ok = s.peek()
if !ok {
return nil, ok
}
// remove it from the stack.
s.Data = s.Data[:s.top()]
return v, true
}
// Push adds the 'v' element to the top of the stack.
func (s *Stack) Push(v interface{}) {
s.Data = append(s.Data, v)
}
func (s *Stack) peek() (interface{}, bool) {
top := s.top()
// check if the stack is zero size.
if top == -1 {
return nil, false
}
// get the last element.
return s.Data[top], true
}
func (s *Stack) top() int {
return len(s.Data) - 1
}

+ 342
- 0
internal/jbig2/bitmap/bin-expand.go View File

@ -0,0 +1,342 @@
/*
* This file is subject to the terms and conditions defined in
* file 'LICENSE.md', which is part of this source code package.
*/
package bitmap
import (
"github.com/unidoc/unipdf/v3/internal/jbig2/errors"
)
func expandBinaryFactor2(d, s *Bitmap) (err error) {
const processName = "expandBinaryFactor2"
bplS := s.RowStride
bplD := d.RowStride
var (
source byte
expanded uint16
lineS, lineD, i, j, index int
)
for i = 0; i < s.Height; i++ {
lineS = i * bplS
lineD = 2 * i * bplD
// set two bytes per line in 'd' bitmap.
for j = 0; j < bplS; j++ {
source = s.Data[lineS+j]
expanded = tabExpand2x[source]
index = lineD + j*2
// the 'd' Bitmap might not have rowstride = 2*s.Rowstride
// thus setting two bytes might set byte on the next row.
if d.RowStride != s.RowStride*2 && (j+1)*2 > d.RowStride {
err = d.SetByte(index, byte(expanded>>8))
} else {
err = d.setTwoBytes(index, expanded)
}
if err != nil {
return errors.Wrap(err, processName, "")
}
}
// copy this 'doubled' line for the next line
for j = 0; j < bplD; j++ {
index = lineD + bplD + j
source = d.Data[lineD+j]
if err = d.SetByte(index, source); err != nil {
return errors.Wrapf(err, processName, "copy doubled line: '%d', Byte: '%d'", lineD+j, lineD+bplD+j)
}
}
}
return nil
}
func expandBinaryFactor4(d, s *Bitmap) (err error) {
const processName = "expandBinaryFactor4"
bplS := s.RowStride
bplD := d.RowStride
diff := s.RowStride*4 - d.RowStride
var (
source, temp byte
expanded uint32
lineS, lineD, i, j, k, index, iindex int
)
for i = 0; i < s.Height; i++ {
lineS = i * bplS
lineD = 4 * i * bplD
// set four bytes per line in 'd' bitmap.
for j = 0; j < bplS; j++ {
source = s.Data[lineS+j]
expanded = tabExpand4x[source]
index = lineD + j*4
// the 'd' Bitmap might not have rowstride = 4*s.Rowstride
// i.e.:
// s.width: 18 -> rowstride = 3; d.Width = 72 -> 9 | 3 * 4 = 12 | 12 - 9 = 3
// s.width: 20 -> rowstride = 3; d.Width = 80 -> 10 | 3 * 4 = 12 | 12 - 10 = 2
// s.width: 46 -> rowstride = 6; d.Width = 184 -> 23 | 4 * 6 = 24 | 24 - 23 = 1
// thus setting two bytes might set byte on the next row.
if diff != 0 && (j+1)*4 > d.RowStride {
for k = diff; k > 0; k-- {
temp = byte((expanded >> uint(k*8)) & 0xff)
iindex = index + (diff - k)
if err = d.SetByte(iindex, temp); err != nil {
return errors.Wrapf(err, processName, "Different rowstrides. K: %d", k)
}
}
} else if err = d.setFourBytes(index, expanded); err != nil {
return errors.Wrap(err, processName, "")
}
if err = d.setFourBytes(lineD+j*4, tabExpand4x[s.Data[lineS+j]]); err != nil {
return errors.Wrap(err, processName, "")
}
}
// copy this 'quadrable' line for the next 3 lines too
for k = 1; k < 4; k++ {
for j = 0; j < bplD; j++ {
if err = d.SetByte(lineD+k*bplD+j, d.Data[lineD+j]); err != nil {
return errors.Wrapf(err, processName, "copy 'quadrable' line: '%d', byte: '%d'", k, j)
}
}
}
}
return nil
}
func expandBinaryFactor8(d, s *Bitmap) (err error) {
const processName = "expandBinaryFactor8"
bplS := s.RowStride
bplD := d.RowStride
var lineS, lineD, i, j, k int
for i = 0; i < s.Height; i++ {
lineS = i * bplS
lineD = 8 * i * bplD
// set four bytes per line in 'd' bitmap.
for j = 0; j < bplS; j++ {
if err = d.setEightBytes(lineD+j*8, tabExpand8x[s.Data[lineS+j]]); err != nil {
return errors.Wrap(err, processName, "")
}
}
// copy this factor * 8 line for the next 7 lines too
for k = 1; k < 8; k++ {
for j = 0; j < bplD; j++ {
if err = d.SetByte(lineD+k*bplD+j, d.Data[lineD+j]); err != nil {
return errors.Wrap(err, processName, "")
}
}
}
}
return nil
}
func expandBinaryPower2(s *Bitmap, factor int) (*Bitmap, error) {
const processName = "expandBinaryPower2"
if s == nil {
return nil, errors.Error(processName, "source not defined")
}
if factor == 1 {
return copyBitmap(nil, s)
}
if factor != 2 && factor != 4 && factor != 8 {
return nil, errors.Error(processName, "factor must be in {2,4,8} range")
}
wd := factor * s.Width
hd := factor * s.Height
d := New(wd, hd)
var err error
switch factor {
case 2:
err = expandBinaryFactor2(d, s)
case 4:
err = expandBinaryFactor4(d, s)
case 8:
err = expandBinaryFactor8(d, s)
}
if err != nil {
return nil, errors.Wrap(err, processName, "")
}
return d, nil
}
func expandBinaryPower2Low(d *Bitmap, s *Bitmap, factor int) (err error) {
const processName = "expandBinaryPower2Low"
switch factor {
case 2:
err = expandBinaryFactor2(d, s)
case 4:
err = expandBinaryFactor4(d, s)
case 8:
err = expandBinaryFactor8(d, s)
default:
return errors.Error(processName, "expansion factor not in {2,4,8} range")
}
if err != nil {
err = errors.Wrap(err, processName, "")
}
return err
}
func expandBinaryReplicate(s *Bitmap, xFact, yFact int) (*Bitmap, error) {
const processName = "expandBinaryReplicate"
if s == nil {
return nil, errors.Error(processName, "source not defined")
}
if xFact <= 0 || yFact <= 0 {
return nil, errors.Error(processName, "invalid scale factor: <= 0")
}
if xFact == yFact {
if xFact == 1 {
bm, err := copyBitmap(nil, s)
if err != nil {
return nil, errors.Wrap(err, processName, "xFact == yFact")
}
return bm, nil
}
if xFact == 2 || xFact == 4 || xFact == 8 {
bm, err := expandBinaryPower2(s, xFact)
if err != nil {
return nil, errors.Wrap(err, processName, "xFact in {2,4,8}")
}
return bm, nil
}
}
wd := xFact * s.Width
hd := yFact * s.Height
d := New(wd, hd)
bplD := d.RowStride
var (
lineD, i, j, k, start int
bt byte
err error
)
for i = 0; i < s.Height; i++ {
// lineS = i * bplS
lineD = yFact * i * bplD
// replicate pixels on single line
for j = 0; j < s.Width; j++ {
// get bit at
if pix := s.GetPixel(j, i); pix {
start = xFact * j
for k = 0; k < xFact; k++ {
d.setBit(lineD*8 + start + k)
}
}
}
// replicate the line
for k = 1; k < yFact; k++ {
indexD := lineD + k*bplD
// iterate over all bytes of line
for bi := 0; bi < bplD; bi++ {
if bt, err = d.GetByte(lineD + bi); err != nil {
return nil, errors.Wrapf(err, processName, "replicating line: '%d'", k)
}
if err = d.SetByte(indexD+bi, bt); err != nil {
return nil, errors.Wrap(err, processName, "Setting byte failed")
}
}
}
}
return d, nil
}
func makeExpandTab2x() (tab [256]uint16) {
for i := 0; i < 256; i++ {
if i&0x01 != 0 {
tab[i] |= 0x3
}
if i&0x02 != 0 {
tab[i] |= 0xc
}
if i&0x04 != 0 {
tab[i] |= 0x30
}
if i&0x08 != 0 {
tab[i] |= 0xc0
}
if i&0x10 != 0 {
tab[i] |= 0x300
}
if i&0x20 != 0 {
tab[i] |= 0xc00
}
if i&0x40 != 0 {
tab[i] |= 0x3000
}
if i&0x80 != 0 {
tab[i] |= 0xc000
}
}
return tab
}
func makeExpandTab4x() (tab [256]uint32) {
for i := 0; i < 256; i++ {
if i&0x01 != 0 {
tab[i] |= 0xf
}
if i&0x02 != 0 {
tab[i] |= 0xf0
}
if i&0x04 != 0 {
tab[i] |= 0xf00
}
if i&0x08 != 0 {
tab[i] |= 0xf000
}
if i&0x10 != 0 {
tab[i] |= 0xf0000
}
if i&0x20 != 0 {
tab[i] |= 0xf00000
}
if i&0x40 != 0 {
tab[i] |= 0xf000000
}
if i&0x80 != 0 {
tab[i] |= 0xf0000000
}
}
return tab
}
func makeExpandTab8x() (tab [256]uint64) {
for i := 0; i < 256; i++ {
if i&0x01 != 0 {
tab[i] |= 0xff
}
if i&0x02 != 0 {
tab[i] |= 0xff00
}
if i&0x04 != 0 {
tab[i] |= 0xff0000
}
if i&0x08 != 0 {
tab[i] |= 0xff000000
}
if i&0x10 != 0 {
tab[i] |= 0xff00000000
}
if i&0x20 != 0 {
tab[i] |= 0xff0000000000