Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save zgordan-vv/487d6d5143df61d120d71914cccbbfd0 to your computer and use it in GitHub Desktop.
Save zgordan-vv/487d6d5143df61d120d71914cccbbfd0 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"strconv"
"github.com/unidoc/unioffice/document"
"github.com/unidoc/unioffice/schema/soo/wml"
"github.com/unidoc/unioffice/schema/soo/ofc/sharedTypes"
)
type KeyValuePair struct {
Key []Paragraph
Value []Paragraph
}
type Paragraph struct {
IsListItem bool
Content []Span
Styles StylesT
}
type Span struct {
Text string
Styles StylesT
}
type StylesT struct {
RStyle string
Font string
EastAsiaFont string
HexColor string
Spacing string
FontSize string
FontSizeComplex string
UnderlineType wml.ST_Underline
UnderlineColor string
Bold bool
Italic bool
Caps bool
Strike bool
DoubleStrike bool
Outline bool
Shadow bool
Emboss bool
RightToLeft bool
VerticalAlign sharedTypes.ST_VerticalAlignRun
}
func main() {
doc, err := document.Open("tables.docx")
if err != nil {
panic(err)
}
extracted := ExtractFromDocTables(doc)
fmt.Println(extracted)
}
func ExtractFromDocTables(doc *document.Document) []KeyValuePair {
result := []KeyValuePair{}
for _, tbl := range doc.Tables() {
for _, crc := range tbl.X().EG_ContentRowContent {
for _, tr := range crc.Tr {
ccc := tr.EG_ContentCellContent
keyPars := extractFromCell(ccc[0])
valuePars := extractFromCell(ccc[1])
result = append(result, KeyValuePair{
Key: keyPars,
Value: valuePars,
})
}
}
}
return result
}
func extractFromCell(ccc *wml.EG_ContentCellContent) []Paragraph {
paragraphs := []Paragraph{}
for _, ble := range ccc.Tc[0].EG_BlockLevelElts {
for _, cbc := range ble.EG_ContentBlockContent {
for _, sourcePar := range cbc.P {
paragraph := Paragraph{
Content: getContent(sourcePar.EG_PContent),
Styles: pPr2Styles(sourcePar.PPr),
IsListItem: sourcePar.PPr.NumPr != nil,
}
paragraphs = append(paragraphs, paragraph)
}
}
}
return paragraphs
}
func getContent(pcs []*wml.EG_PContent) []Span {
content := []Span{}
for _, pc := range pcs {
for _, crc := range pc.EG_ContentRunContent {
r := crc.R
rStyles := rPr2Styles(r.RPr)
text := ""
for _, ic := range r.EG_RunInnerContent {
text += ic.T.Content
}
content = append(content, Span{
Text: text,
Styles: rStyles,
})
}
}
return content
}
func pPr2Styles(ppr *wml.CT_PPr) StylesT {
pr := ppr.RPr
styles := StylesT{
Bold: getBool(pr.B) || getBool(pr.BCs),
Italic: getBool(pr.I) || getBool(pr.ICs),
Caps: getBool(pr.Caps),
Strike: getBool(pr.Strike),
DoubleStrike: getBool(pr.Dstrike),
Outline: getBool(pr.Outline),
Shadow: getBool(pr.Shadow),
Emboss: getBool(pr.Emboss),
RightToLeft: getBool(pr.Rtl),
}
if pr.RStyle != nil {
styles.RStyle = pr.RStyle.ValAttr
}
if fonts := pr.RFonts; fonts != nil {
font := ""
if fonts.AsciiAttr != nil {
font = *fonts.AsciiAttr
} else if fonts.HAnsiAttr != nil {
font = *fonts.HAnsiAttr
} else if fonts.CsAttr != nil {
font = *fonts.CsAttr
}
styles.Font = font
if fonts.EastAsiaAttr != nil {
styles.EastAsiaFont = *fonts.EastAsiaAttr
}
}
if color := pr.Color; color != nil {
valAttr := color.ValAttr
if valAttr.ST_HexColorRGB != nil {
styles.HexColor = *valAttr.ST_HexColorRGB
}
}
if spacing := pr.Spacing; spacing != nil {
spacingResult := ""
valAttr := spacing.ValAttr
if valAttr.Int64 != nil {
spacingResult += strconv.FormatInt(*valAttr.Int64, 10)
}
if valAttr.ST_UniversalMeasure != nil {
spacingResult += *valAttr.ST_UniversalMeasure
}
styles.Spacing = spacingResult
}
if sz := pr.Sz; sz != nil {
size := ""
valAttr := sz.ValAttr
if valAttr.ST_UnsignedDecimalNumber != nil {
size += strconv.FormatUint(*valAttr.ST_UnsignedDecimalNumber, 10)
}
if valAttr.ST_PositiveUniversalMeasure != nil {
size += *valAttr.ST_PositiveUniversalMeasure
}
styles.FontSize = size
}
if sz := pr.SzCs; sz != nil {
size := ""
valAttr := sz.ValAttr
if valAttr.ST_UnsignedDecimalNumber != nil {
size += strconv.FormatUint(*valAttr.ST_UnsignedDecimalNumber, 10)
}
if valAttr.ST_PositiveUniversalMeasure != nil {
size += *valAttr.ST_PositiveUniversalMeasure
}
styles.FontSizeComplex = size
}
if underline := pr.U; underline != nil {
styles.UnderlineType = underline.ValAttr
color := underline.ColorAttr
if color.ST_HexColorRGB != nil {
styles.UnderlineColor = *color.ST_HexColorRGB
}
}
if vertAlign := pr.VertAlign; vertAlign != nil {
styles.VerticalAlign = vertAlign.ValAttr
}
return styles
}
func rPr2Styles(pr *wml.CT_RPr) StylesT {
styles := StylesT{
Bold: getBool(pr.B) || getBool(pr.BCs),
Italic: getBool(pr.I) || getBool(pr.ICs),
Caps: getBool(pr.Caps),
Strike: getBool(pr.Strike),
DoubleStrike: getBool(pr.Dstrike),
Outline: getBool(pr.Outline),
Shadow: getBool(pr.Shadow),
Emboss: getBool(pr.Emboss),
RightToLeft: getBool(pr.Rtl),
}
if pr.RStyle != nil {
styles.RStyle = pr.RStyle.ValAttr
}
if fonts := pr.RFonts; fonts != nil {
font := ""
if fonts.AsciiAttr != nil {
font = *fonts.AsciiAttr
} else if fonts.HAnsiAttr != nil {
font = *fonts.HAnsiAttr
} else if fonts.CsAttr != nil {
font = *fonts.CsAttr
}
styles.Font = font
if fonts.EastAsiaAttr != nil {
styles.EastAsiaFont = *fonts.EastAsiaAttr
}
}
if color := pr.Color; color != nil {
valAttr := color.ValAttr
if valAttr.ST_HexColorRGB != nil {
styles.HexColor = *valAttr.ST_HexColorRGB
}
}
if spacing := pr.Spacing; spacing != nil {
spacingResult := ""
valAttr := spacing.ValAttr
if valAttr.Int64 != nil {
spacingResult += strconv.FormatInt(*valAttr.Int64, 10)
}
if valAttr.ST_UniversalMeasure != nil {
spacingResult += *valAttr.ST_UniversalMeasure
}
styles.Spacing = spacingResult
}
if sz := pr.Sz; sz != nil {
size := ""
valAttr := sz.ValAttr
if valAttr.ST_UnsignedDecimalNumber != nil {
size += strconv.FormatUint(*valAttr.ST_UnsignedDecimalNumber, 10)
}
if valAttr.ST_PositiveUniversalMeasure != nil {
size += *valAttr.ST_PositiveUniversalMeasure
}
styles.FontSize = size
}
if sz := pr.SzCs; sz != nil {
size := ""
valAttr := sz.ValAttr
if valAttr.ST_UnsignedDecimalNumber != nil {
size += strconv.FormatUint(*valAttr.ST_UnsignedDecimalNumber, 10)
}
if valAttr.ST_PositiveUniversalMeasure != nil {
size += *valAttr.ST_PositiveUniversalMeasure
}
styles.FontSizeComplex = size
}
if underline := pr.U; underline != nil {
styles.UnderlineType = underline.ValAttr
color := underline.ColorAttr
if color.ST_HexColorRGB != nil {
styles.UnderlineColor = *color.ST_HexColorRGB
}
}
if vertAlign := pr.VertAlign; vertAlign != nil {
styles.VerticalAlign = vertAlign.ValAttr
}
return styles
}
func getBool(onOff *wml.CT_OnOff) bool {
return onOff != nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment