diff --git a/.gitignore b/.gitignore index ec2481e..d10a5fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,2 @@ -# ---> Vim -# Swap -[._]*.s[a-v][a-z] -!*.svg # comment out if you don't need vector files -[._]*.sw[a-p] -[._]s[a-rt-v][a-z] -[._]ss[a-gi-z] -[._]sw[a-p] - -# Session -Session.vim -Sessionx.vim - -# Temporary -.netrwhist -*~ -# Auto-generated tag files -tags -# Persistent undo -[._]*.un~ - +*.swp +*.swo \ No newline at end of file diff --git a/TSDocxToPdf.tsf b/TSDocxToPdf.tsf new file mode 100644 index 0000000..5b6e686 --- /dev/null +++ b/TSDocxToPdf.tsf @@ -0,0 +1,418 @@ +type TSDocxToPdf = class +uses TSColorToolKit, TSPdfEnumerations; + +public + function Create(alias, file); + function SaveToFile(alias, file); + function Transform(); + + property Font read ReadFont; + function ReadFont(); + +private + function Init(); + function InitSectPr(); + function InitEncoder(); + function InitPoint(); + function InitStyles(); + + function TransformParagraph(paragraph); + function TransformDrawing(drawing); + function TransformTable(table); + + function AddPage(); + function CheckAndAddPage(offset); + function ResetCoordinates(); + function CalcPagragraphPt(size, line); + function FloatN(R, N); + function ParagraphWordsToLine(ware); + function GetElementType(element); + function SetPageItalic(page, x, y); // 模拟倾斜[废弃] + + // test用 + function PrintGrid(); + +private + docx_components_; // Components@DOCX + styles_adapter_; // StylesAdapter@Docx + + pdf_; // PdfFile + point_; // Point + sect_; + base_size_; // 基准字体大小 + font_ware_; + current_page_; // 当前page +end; + +type Point = class // 定位当前的位置 +public + X; + Y; +end; + +function TSDocxToPdf.Create(alias, file); +begin + NameSpace "DOCX"; + docx_components_ := new Components(); + [err, msg] := docx_components_.OpenFile(alias, file, nil); + if err then raise "Create obejct 'TSDocxFile' failed."; + self.Init(); +end; + +function TSDocxToPdf.Init(); +begin + pdf_ := new PdfFile(); + self.InitStyles(); + self.InitEncoder(); + self.InitSectPr(); + self.InitPoint(); + self.AddPage(); + base_size_ := integer(sect_.DocGrid.LinePitch / 1.38, 2); + font_ware_ := new TSFontWare(pdf_); +end; + +function TSDocxToPdf.InitEncoder(); +begin + pdf_.UseCNSFonts(); + pdf_.UseCNSEncodings(); + // pdf_.UseUTFEncodings(); +end; + +function TSDocxToPdf.InitSectPr(); +begin + document := docx_components_.Document; + document.Deserialize(); + sect_ := document.Body.SectPr; + // 装饰器进行转换 + sect_ := new SectPrUnitDecorator(sect_); + sect_.PgSz.Orient := sect_.PgSz.Orient ? "portrait" : "landscape"; + + // println("LinePitch = {}, Type = {}", sect_.DocGrid.LinePitch, sect_.DocGrid.Type); + // println("Width = {}, Height = {}", sect_.PgSz.W, sect_.PgSz.H); + // println("Top = {}, Right = {}, Bottom = {}, Left = {}, Header = {}, Footer = {}\n", + // sect_.PgMar.Top, sect_.PgMar.Right, sect_.PgMar.Bottom, sect_.PgMar.Left, sect_.PgMar.Header, sect_.PgMar.Footer); +end; + +function TSDocxToPdf.InitPoint(); +begin + // 起始y的位置应为max(top, header) + 行距 + point_ := new Point(); + self.ResetCoordinates(); +end; + +function TSDocxToPdf.InitStyles(); +begin + styles := docx_components_.Styles; + styles.Deserialize(); + styles_adapter_ := new StylesAdapter(styles); +end; + +function TSDocxToPdf.AddPage(); +begin + current_page_ := pdf_.AddPage(); + current_page_.SetWidth(sect_.PgSz.W); + current_page_.SetHeight(sect_.PgSz.H); + self.PrintGrid(); +end; + +function TSDocxToPdf.ResetCoordinates(); +begin + point_.X := sect_.PgMar.Left; + point_.Y := sect_.PgSz.H - sect_.PgMar.Top; +end; + +///返回:err +function TSDocxToPdf.SaveToFile(alias, file); +begin + return pdf_.SaveToFile(alias, file); +end; + +function TSDocxToPdf.PrintGrid(); // test用 +begin + page := current_page_; + + i := 0; + while true do + begin + y := point_.Y - i * sect_.DocGrid.LinePitch; + if y <= sect_.PgMar.Bottom then break; + page.SetLineWidth(0.05); + page.SetGrayStroke(0.75); + page.MoveTo(sect_.PgMar.Left, y); + page.LineTo(sect_.PgSz.W- sect_.PgMar.Right, y); + page.Stroke(); + i++; + end + + x1 := sect_.PgMar.Left; + y1 := sect_.PgSz.H - sect_.PgMar.Top; + x2 := sect_.PgSz.W - sect_.PgMar.Right; + y2 := y1; + x3 := x1; + y3 := sect_.PgMar.Bottom; + x4 := x2; + y4 := y3; + page.SetLineWidth(0.05); + page.SetGrayStroke(0.5); + page.MoveTo(x1, y1); + page.LineTo(x2, y2); + page.Stroke(); + page.MoveTo(x1, y1); + page.LineTo(x3, y3); + page.Stroke(); + page.MoveTo(x2, y2); + page.LineTo(x4, y4); + page.Stroke(); + page.MoveTo(x3, y3); + page.LineTo(x4, y4); + page.Stroke(); + +end; + +function TSDocxToPdf.GetElementType(element); +begin + if element.LocalName = 'p' then return 1; + if element.LocalName = 'tbl' then return 3; + return 0; +end; + +function TSDocxToPdf.Transform(); +begin + elements := docx_components_.Document.Body.Elements(); + for i:=0 to length(elements)-1 do + // for i:=0 to 2 do + begin + println("i = {}", i); + case self.GetElementType(elements[i]) of + 1: self.TransformParagraph(elements[i]); // 普通段落 + 2: self.TransformDrawing(elements[i]); // 图片段落 + 3: self.TransformTable(elements[i]); // 表格 + end; + end +end; + +function TSDocxToPdf.CheckAndAddPage(offset); +begin + offset := ifnil(offset) ? 0 : offset; + if point_.Y - offset <= sect_.PgMar.Bottom then + begin + self.AddPage(); + self.ResetCoordinates(); + self.PrintGrid(); + return true; + end + return false; +end; + +function TSDocxToPdf.ParagraphWordsToLine(ware); +begin + self.CheckAndAddPage(); + page := current_page_; + lines := array(); + x := point_.X + ware.Paragraph.PPr.Ind.FirstLine; + y := point_.Y; + max_size := 0; + + i := 0; + begin_index := 0; + words := ware.GetWords(); + while i <= length(words)-1 do + begin + [word, rpr] := words[i]; + if rpr.Sz.Val > max_size then + max_size := rpr.Sz.Val; + + font_obj := font_ware_.GetFont(rpr.RFonts.EastAsia, rpr.B, rpr.I); + lines[i]["page"] := page; + lines[i]["font"] := font_obj; + lines[i]["word"] := word; + lines[i]["rpr"] := rpr; + lines[i]["x"] := x; + page.SetFontAndSize(font_obj, rpr.Sz.Val); + w := page.TextWidth(word); + // println("word = {}, x = {}, w = {}, sz = {}", word, x, w, rpr.Sz.Val); + x += w; + + if ware.Paragraph.PPr.AutoSpaceDN and i < length(words)-1 then + begin + current_len := length(word); + next_len := length(words[i+1][0]); + if (current_len = 1 and next_len >= 2) or (current_len >= 2 and next_len = 1) then + begin + cord := current_len = 1 ? ord(word) : ord(words[i+1][0]); + if cord >= 48 and cord <= 57 then + x += rpr.Sz.Val * 0.27; + end + end + + if ware.Paragraph.PPr.AutoSpaceDE and i < length(words)-1 then + begin + current_len := length(word); + next_len := length(words[i+1][0]); + if (current_len = 1 and next_len >= 2) or (current_len >= 2 and next_len = 1) then + begin + cord := current_len = 1 ? ord(word) : ord(words[i+1][0]); + if (cord >= 97 and cord <= 122) or (cord >= 65 and cord <= 90) then + x += rpr.Sz.Val * 0.27; + end + end + + line_pt := self.CalcPagragraphPt(max_size, ware.Paragraph.PPr.Spacing.Line); + offset := (line_pt - max_size) / 2; + if self.CheckAndAddPage(line_pt) then // 换页,x不变,y变 + begin + page := current_page_; + x := point_.X; + y := point_.Y; + i := begin_index; + max_size := 0; + continue; + end + if x >= sect_.PgSz.W - sect_.PgMar.Right then // 换行 + begin + y := y - offset - max_size + max_size / 5; + for j:=begin_index to i-1 do + lines[j]["y"] := y; + + // 重置参数 + begin_index := i; + max_size := 0; + x := point_.X; + y := point_.Y - line_pt; + point_.Y := y; + + // 换页 + if self.CheckAndAddPage() then + begin + page := current_page_; + x := point_.X; + y := point_.Y; + end + end + else begin + i++; + if i > length(words)-1 then // 到了末尾仍未换行 + begin + y := y - offset - max_size + max_size / 5; + for j:=begin_index to length(words)-1 do + lines[j]["y"] := y; + point_.Y -= line_pt; + end + end + end + return lines; + +end; + +function TSDocxToPdf.SetPageItalic(page, x, y); +begin + angle := 130; + rad := angle / 180 / Pi(); + page.SetTextMatrix(1, 0, tan(rad), 1, x, y); +end; + +function TSDocxToPdf.TransformParagraph(paragraph); +begin + // 1. 字体大小 + // 2. 字体颜色 + // 3. 字体斜体 + // 4. 字体粗体 + // 5. 首行间距 + // 6. 字符间距 + + // 7. 下划线 + // 8. 删除线 + // 9. 上下标 + // 10. 对齐方式 + paragraph_ware := new TSParagraphWare(docx_components_, styles_adapter_, paragraph); + paragraph_ware.Do(); + + // 将段落中间件的每一个字符序列化成每一行 + lines := self.ParagraphWordsToLine(paragraph_ware); + // 开始写入pdf + for i:=0 to length(lines)-1 do + begin + line := lines[i]; + word := line["word"]; + rpr := line["rpr"]; + [r, g, b] := array(0, 0, 0); + if rpr.Color.Val then [r, g, b] := TSColorToolKit.HexToRGB(rpr.Color.Val); + page := line["page"]; + page.SetRGBFill(r / 255, g / 255, b / 255); + page.SetFontAndSize(line["font"], rpr.Sz.Val); + + x := line["x"]; + y := line["y"]; + if draw_bold then + begin + // 1. 调整偏移位置重复绘制 -- 当前参数的效果不太好 + // z := 0.0005 * sqrt(x*x + y*y) * sqrt(2) / 2; + // left_x := x - z; // 左下角x位置 + // left_y := y - z; // 左下角y位置 + // right_x := x + z; // 右上角x位置 + // right_y := y + z; // 右上角y位置 + // offset := word.WordProperty.Size / 1000; // 每次偏移千分之一 + // println("z = {}", z); + // println("x = {}, y = {}, offset = {}", x, y, offset); + // println("left_x = {}, left_y = {}, right_x = {}, left_y = {}\n", left_x, left_y, right_x, right_x); + // while left_x <= right_x and left_y <= right_y do + // begin + // if draw_italic then self.SetPageItalic(page, left_x, left_y); + // page.TextOut(left_x, left_y, word.Word); + // left_x += offset; + // left_y += right_y; + // end + + // 2. 调整字体大小,x,y位置重复绘制 + multi := 1.030; + size := word.WordProperty.Size; + target_size := size * multi; + offset := size * 0.01; + size /= multi; + while size <= target_size do + begin + page.SetFontAndSize(font, size); + page.beginText(); + if draw_italic then self.SetPageItalic(page, x, y); + page.TextOut(x, y, word.Word); + page.endText(); + size += offset; + end + end + else begin + page.beginText(); + page.TextOut(x, y, word); + page.endText(); + end + + page.SetRGBFill(0, 0, 0); + end + +end; + +function TSDocxToPdf.TransformDrawing(drawing); +begin +end; + +function TSDocxToPdf.TransformTable(table); +begin +end; + +function TSDocxToPdf.CalcPagragraphPt(size, line); +begin + if ifnil(line) then line := 12; + lines := self.FloatN(line / 12, 2); + multi := Ceil(size / base_size_); + return sect_.DocGrid.LinePitch * multi; +end; + +function TSDocxToPdf.FloatN(r, n); +begin + return Round(r * IntPower(10, n)) / IntPower(10, n); +end; + +function TSDocxToPdf.ReadFont(); +begin + return font_ware_; +end; + diff --git a/fonts/仿宋.ttf b/fonts/仿宋.ttf new file mode 100644 index 0000000..68334e2 Binary files /dev/null and b/fonts/仿宋.ttf differ diff --git a/fonts/华文楷体.ttf b/fonts/华文楷体.ttf new file mode 100644 index 0000000..5044116 Binary files /dev/null and b/fonts/华文楷体.ttf differ diff --git a/fonts/宋体.ttc b/fonts/宋体.ttc new file mode 100644 index 0000000..6ca8de3 Binary files /dev/null and b/fonts/宋体.ttc differ diff --git a/fonts/微软雅黑-bold.ttc b/fonts/微软雅黑-bold.ttc new file mode 100644 index 0000000..351b10f Binary files /dev/null and b/fonts/微软雅黑-bold.ttc differ diff --git a/fonts/微软雅黑-light.ttc b/fonts/微软雅黑-light.ttc new file mode 100644 index 0000000..c185092 Binary files /dev/null and b/fonts/微软雅黑-light.ttc differ diff --git a/fonts/微软雅黑.ttc b/fonts/微软雅黑.ttc new file mode 100644 index 0000000..ddc87b9 Binary files /dev/null and b/fonts/微软雅黑.ttc differ diff --git a/fonts/等线-bold.ttf b/fonts/等线-bold.ttf new file mode 100644 index 0000000..882c20f Binary files /dev/null and b/fonts/等线-bold.ttf differ diff --git a/fonts/等线-light.ttf b/fonts/等线-light.ttf new file mode 100644 index 0000000..d8434d8 Binary files /dev/null and b/fonts/等线-light.ttf differ diff --git a/fonts/等线.ttf b/fonts/等线.ttf new file mode 100644 index 0000000..0a727fd Binary files /dev/null and b/fonts/等线.ttf differ diff --git a/fonts/黑体.ttf b/fonts/黑体.ttf new file mode 100644 index 0000000..5bd4687 Binary files /dev/null and b/fonts/黑体.ttf differ diff --git a/test/test.tsl b/test/test.tsl new file mode 100644 index 0000000..9a6f78b --- /dev/null +++ b/test/test.tsl @@ -0,0 +1,20 @@ +// NameSpace "DOCX"; + +WordToPdf(); + +function WordToPdf(); +begin + alias := ""; + doc := "D:\\temp\\fangan.docx"; + output := "D:\\temp\\1.pdf"; + // doc := "/mnt/d/temp/fontsize.docx"; + // output := "/mnt/d/temp/1.pdf"; + docx_to_pdf := new TSDocxToPdf(alias, doc); + docx_to_pdf.Font.UseBuiltInFont(); + docx_to_pdf.Font.SetSubstitutionRules("仿宋", "宋体"); + + docx_to_pdf.Transform(); + err := docx_to_pdf.SaveToFile(alias, output); + echo "SaveToFile::\t", "err := ", format("%x", err), "\toutput_file := ", output, "\n"; +end; + diff --git a/utils/TSColorToolKit.tsf b/utils/TSColorToolKit.tsf new file mode 100644 index 0000000..ade7143 --- /dev/null +++ b/utils/TSColorToolKit.tsf @@ -0,0 +1,23 @@ +unit TSColorToolKit; +interface + function HexToRGB(hex); + +implementation + + function HexToRGB(hex); + begin + hex_string := ifnumber(hex) ? format("%x", hex) : hex; + if length(hex_string) = 7 then + begin + if hex_string[1] <> "#" then raise "Invalid hexadecimal parameter."; + hex_string := hex_string[1:]; + end + if length(hex_string) <> 6 then raise "Invalid hexadecimal parameter"; + r := eval(&"return 0x" + hex_string[1:2]); + g := eval(&"return 0x" + hex_string[3:4]); + b := eval(&"return 0x" + hex_string[5:6]); + return array(r, g, b); + end; + +end. + diff --git a/utils/TSFontWare.tsf b/utils/TSFontWare.tsf new file mode 100644 index 0000000..204ce32 --- /dev/null +++ b/utils/TSFontWare.tsf @@ -0,0 +1,100 @@ +type TSFontWare = class +public + function Create(pdf); + function Init(); + function UseBuiltInFont(); + function SetSubstitutionRules(source, target); + function GetFont(name, bold, italic); + +private + function GetExternalFont(name, bold, italic); + function GetBuiltInFont(name, bold, italic); + +private + pdf_; + is_linux_; // 是否是linux + + use_built_in_font_; // 是否使用内置字体 + substitution_rules_; // 替换规则 + external_reference_; + external_font_cache_; +end; + + +function TSFontWare.Create(pdf); +begin + pdf_ := pdf; + is_linux_ := true; + use_built_in_font_ := false; + substitution_rules_ := array("宋体": "SimSun", "黑体": "SimHei"); + external_reference_ := array(); + external_font_cache_ := array(); + // self.Init(); +end; + +function TSFontWare.UseBuiltInFont(); +begin + use_built_in_font_ := true; +end; + +function TSFontWare.Init(); +begin +{$IFDEF LINUX} + is_linux_ := true; +{$ELSE} + is_linux_ := false; +{$ENDIF} + separator := is_linux_ ? "/" : "\\"; + path := extractFileDir(sysExecName()) + separator + "funcext" + separator + "WordToPdf" + separator + "fonts" + separator; + files := fileList("", path + "*.tt*"); + for i:=0 to length(files)-1 do + begin + filename := files[i]["FileName"]; + ext := extractFileExt(filename); + pos := pos(ext, filename); + name := is_linux_ ? filename[:pos-1] : ansiToUTF8(filename[:pos-1]); + external_reference_[name] := array("ext": ext, "path": path + filename); + end +end; + +function TSFontWare.GetExternalFont(name, bold, italic); +begin + if ifnil(name) or name = '' then name := "等线"; + if not ifnil(external_font_cache_[name]) then return external_font_cache_[name]; + value := external_reference_[name]; + if ifnil(value) then return nil; + // if ifnil(value) then raise name + " is unsupported font."; + if value["ext"] = ".ttf" then + font_name := pdf_.LoadTTFontFromFile("", value["path"], true); + else if value["ext"] = ".ttc" then + font_name := pdf_.LoadTTFontFromFile2("", value["path"], 0, true); + // if not ifString(font_name) then raise "Load font error : " + format("%x", font_name); + if not ifString(font_name) then return nil; + font := pdf_.GetFont(font_name, "UTF-8"); + external_font_cache_[name] := font; + return font; +end; + +function TSFontWare.GetFont(name, bold, italic); +begin + return use_built_in_font_ ? self.GetBuiltInFont(name, bold, italic) : self.GetExternalFont(name, bold, italic); +end; + +function TSFontWare.GetBuiltInFont(name, bold, italic); +begin + font_name := substitution_rules_["name"]; + if ifnil(font_name) then font_name := "SimSun"; + if bold and italic then + font_name += ",BoldItalic"; + else if bold then + font_name += ",Bold"; + else if italic then + font_name += ",Italic"; + font := pdf_.GetFont(font_name, "GBK-EUC-H"); + return font; +end; + +function TSFontWare.SetSubstitutionRules(source, target); +begin + substitution_rules_[source] := target; +end; diff --git a/utils/TSParagraphWare.tsf b/utils/TSParagraphWare.tsf new file mode 100644 index 0000000..0842d26 --- /dev/null +++ b/utils/TSParagraphWare.tsf @@ -0,0 +1,108 @@ +type TSParagraphWare = class +public + function Create(components, styles, paragraph); + function Do(); + function GetWords(); + + property Paragraph read ReadParagraph; + function ReadParagraph(); + +private + function SetPPr(ppr); + function SetRPr(rpr, ppr); + function SetRPrByStyleId(rpr, style_id); + function SetPPrByStyleId(ppr, style_id); + +private + docx_components_; + styles_; + paragraph_; + words_; +end; + +function TSParagraphWare.Create(components, styles, paragraph); +begin + docx_components_ := components; + styles_ := styles; + paragraph_ := paragraph; + words_ := array(); +end; + +function TSParagraphWare.Do(); +begin + self.SetPPr(paragraph_.PPr); // styleid与ppr样式合并 + rs := paragraph_.Rs(); + for i:=0 to length(rs)-1 do + begin + r := rs[i]; + self.SetRPr(r.RPr, paragraph_.PPr); // rpr样式与ppr与styleid样式合并 + rpr := new RPrUnitDecorator(r.RPr); + pos := 1; + text := r.T.Text; + while pos <= length(text) do + begin + c := text[pos]; + pos ++; + if ord(c) > 127 then + begin + c := text[pos-1 : pos+1]; + pos += 2; + end + words_[length(words_)] := array(utf8ToAnsi(c), rpr); + end + end + paragraph_.PPr := new PPrUnitDecorator(paragraph_.PPr); +end; + +function TSParagraphWare.SetPPrByStyleId(ppr, style_id); +begin + style := styles_.StyleId(style_id); + if ifObj(style) then + begin + based_on := style.BasedOn.Val; + self.SetPPrByStyleId(ppr, based_on); + ppr.Copy(style.PPr); + ppr.Rpr.Copy(style.RPr); + end +end; + +function TSParagraphWare.SetPPr(ppr); +begin + new_ppr := new PPr(); + style_id := ppr.PStyle.Val; + self.SetPPrByStyleId(new_ppr, style_id); + new_ppr.Copy(ppr); + ppr.Copy(new_ppr); +end; + +function TSParagraphWare.SetRPrByStyleId(rpr, style_id); +begin + style := styles_.StyleId(style_id); + if ifObj(style) then + begin + based_on := style.BasedOn.Val; + self.SetRPrByStyleId(rpr, based_on); + rpr.Copy(style.RPr); + end +end; + +function TSParagraphWare.SetRPr(rpr, ppr); +begin + // rpr,先继承ppr,再继承样式,最后继承自己 + new_rpr := new RPr(); + style_id := rpr.RStyle.Val; + new_rpr.Copy(ppr.RPr); + self.SetRPrByStyleId(new_rpr, style_id); + new_rpr.Copy(rpr); + rpr.Copy(new_rpr); +end; + +function TSParagraphWare.GetWords(); +begin + return words_; +end; + +function TSParagraphWare.ReadParagraph(); +begin + return paragraph_; +end