简单段落

This commit is contained in:
csh 2024-05-20 10:02:06 +08:00
parent 5b7abadba1
commit 61001c9a46
16 changed files with 671 additions and 21 deletions

23
.gitignore vendored
View File

@ -1,21 +1,2 @@
# ---> Vim *.swp
# Swap *.swo
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]
# Session
Session.vim
Sessionx.vim
# Temporary
.netrwhist
*~
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~

418
TSDocxToPdf.tsf Normal file
View File

@ -0,0 +1,418 @@
type TSDocxToPdf = class
uses TSColorToolKit, TSPdfEnumerations;
public
function Create(alias, file);
function SaveToFile(alias, file);
function Transform();
property Font read ReadFont;
function ReadFont();
private
function Init();
function InitSectPr();
function InitEncoder();
function InitPoint();
function InitStyles();
function TransformParagraph(paragraph);
function TransformDrawing(drawing);
function TransformTable(table);
function AddPage();
function CheckAndAddPage(offset);
function ResetCoordinates();
function CalcPagragraphPt(size, line);
function FloatN(R, N);
function ParagraphWordsToLine(ware);
function GetElementType(element);
function SetPageItalic(page, x, y); // 模拟倾斜[废弃]
// test用
function PrintGrid();
private
docx_components_; // Components@DOCX
styles_adapter_; // StylesAdapter@Docx
pdf_; // PdfFile
point_; // Point
sect_;
base_size_; // 基准字体大小
font_ware_;
current_page_; // 当前page
end;
type Point = class // 定位当前的位置
public
X;
Y;
end;
function TSDocxToPdf.Create(alias, file);
begin
NameSpace "DOCX";
docx_components_ := new Components();
[err, msg] := docx_components_.OpenFile(alias, file, nil);
if err then raise "Create obejct 'TSDocxFile' failed.";
self.Init();
end;
function TSDocxToPdf.Init();
begin
pdf_ := new PdfFile();
self.InitStyles();
self.InitEncoder();
self.InitSectPr();
self.InitPoint();
self.AddPage();
base_size_ := integer(sect_.DocGrid.LinePitch / 1.38, 2);
font_ware_ := new TSFontWare(pdf_);
end;
function TSDocxToPdf.InitEncoder();
begin
pdf_.UseCNSFonts();
pdf_.UseCNSEncodings();
// pdf_.UseUTFEncodings();
end;
function TSDocxToPdf.InitSectPr();
begin
document := docx_components_.Document;
document.Deserialize();
sect_ := document.Body.SectPr;
// 装饰器进行转换
sect_ := new SectPrUnitDecorator(sect_);
sect_.PgSz.Orient := sect_.PgSz.Orient ? "portrait" : "landscape";
// println("LinePitch = {}, Type = {}", sect_.DocGrid.LinePitch, sect_.DocGrid.Type);
// println("Width = {}, Height = {}", sect_.PgSz.W, sect_.PgSz.H);
// println("Top = {}, Right = {}, Bottom = {}, Left = {}, Header = {}, Footer = {}\n",
// sect_.PgMar.Top, sect_.PgMar.Right, sect_.PgMar.Bottom, sect_.PgMar.Left, sect_.PgMar.Header, sect_.PgMar.Footer);
end;
function TSDocxToPdf.InitPoint();
begin
// 起始y的位置应为max(top, header) + 行距
point_ := new Point();
self.ResetCoordinates();
end;
function TSDocxToPdf.InitStyles();
begin
styles := docx_components_.Styles;
styles.Deserialize();
styles_adapter_ := new StylesAdapter(styles);
end;
function TSDocxToPdf.AddPage();
begin
current_page_ := pdf_.AddPage();
current_page_.SetWidth(sect_.PgSz.W);
current_page_.SetHeight(sect_.PgSz.H);
self.PrintGrid();
end;
function TSDocxToPdf.ResetCoordinates();
begin
point_.X := sect_.PgMar.Left;
point_.Y := sect_.PgSz.H - sect_.PgMar.Top;
end;
///返回err
function TSDocxToPdf.SaveToFile(alias, file);
begin
return pdf_.SaveToFile(alias, file);
end;
function TSDocxToPdf.PrintGrid(); // test用
begin
page := current_page_;
i := 0;
while true do
begin
y := point_.Y - i * sect_.DocGrid.LinePitch;
if y <= sect_.PgMar.Bottom then break;
page.SetLineWidth(0.05);
page.SetGrayStroke(0.75);
page.MoveTo(sect_.PgMar.Left, y);
page.LineTo(sect_.PgSz.W- sect_.PgMar.Right, y);
page.Stroke();
i++;
end
x1 := sect_.PgMar.Left;
y1 := sect_.PgSz.H - sect_.PgMar.Top;
x2 := sect_.PgSz.W - sect_.PgMar.Right;
y2 := y1;
x3 := x1;
y3 := sect_.PgMar.Bottom;
x4 := x2;
y4 := y3;
page.SetLineWidth(0.05);
page.SetGrayStroke(0.5);
page.MoveTo(x1, y1);
page.LineTo(x2, y2);
page.Stroke();
page.MoveTo(x1, y1);
page.LineTo(x3, y3);
page.Stroke();
page.MoveTo(x2, y2);
page.LineTo(x4, y4);
page.Stroke();
page.MoveTo(x3, y3);
page.LineTo(x4, y4);
page.Stroke();
end;
function TSDocxToPdf.GetElementType(element);
begin
if element.LocalName = 'p' then return 1;
if element.LocalName = 'tbl' then return 3;
return 0;
end;
function TSDocxToPdf.Transform();
begin
elements := docx_components_.Document.Body.Elements();
for i:=0 to length(elements)-1 do
// for i:=0 to 2 do
begin
println("i = {}", i);
case self.GetElementType(elements[i]) of
1: self.TransformParagraph(elements[i]); // 普通段落
2: self.TransformDrawing(elements[i]); // 图片段落
3: self.TransformTable(elements[i]); // 表格
end;
end
end;
function TSDocxToPdf.CheckAndAddPage(offset);
begin
offset := ifnil(offset) ? 0 : offset;
if point_.Y - offset <= sect_.PgMar.Bottom then
begin
self.AddPage();
self.ResetCoordinates();
self.PrintGrid();
return true;
end
return false;
end;
function TSDocxToPdf.ParagraphWordsToLine(ware);
begin
self.CheckAndAddPage();
page := current_page_;
lines := array();
x := point_.X + ware.Paragraph.PPr.Ind.FirstLine;
y := point_.Y;
max_size := 0;
i := 0;
begin_index := 0;
words := ware.GetWords();
while i <= length(words)-1 do
begin
[word, rpr] := words[i];
if rpr.Sz.Val > max_size then
max_size := rpr.Sz.Val;
font_obj := font_ware_.GetFont(rpr.RFonts.EastAsia, rpr.B, rpr.I);
lines[i]["page"] := page;
lines[i]["font"] := font_obj;
lines[i]["word"] := word;
lines[i]["rpr"] := rpr;
lines[i]["x"] := x;
page.SetFontAndSize(font_obj, rpr.Sz.Val);
w := page.TextWidth(word);
// println("word = {}, x = {}, w = {}, sz = {}", word, x, w, rpr.Sz.Val);
x += w;
if ware.Paragraph.PPr.AutoSpaceDN and i < length(words)-1 then
begin
current_len := length(word);
next_len := length(words[i+1][0]);
if (current_len = 1 and next_len >= 2) or (current_len >= 2 and next_len = 1) then
begin
cord := current_len = 1 ? ord(word) : ord(words[i+1][0]);
if cord >= 48 and cord <= 57 then
x += rpr.Sz.Val * 0.27;
end
end
if ware.Paragraph.PPr.AutoSpaceDE and i < length(words)-1 then
begin
current_len := length(word);
next_len := length(words[i+1][0]);
if (current_len = 1 and next_len >= 2) or (current_len >= 2 and next_len = 1) then
begin
cord := current_len = 1 ? ord(word) : ord(words[i+1][0]);
if (cord >= 97 and cord <= 122) or (cord >= 65 and cord <= 90) then
x += rpr.Sz.Val * 0.27;
end
end
line_pt := self.CalcPagragraphPt(max_size, ware.Paragraph.PPr.Spacing.Line);
offset := (line_pt - max_size) / 2;
if self.CheckAndAddPage(line_pt) then // 换页x不变y变
begin
page := current_page_;
x := point_.X;
y := point_.Y;
i := begin_index;
max_size := 0;
continue;
end
if x >= sect_.PgSz.W - sect_.PgMar.Right then // 换行
begin
y := y - offset - max_size + max_size / 5;
for j:=begin_index to i-1 do
lines[j]["y"] := y;
// 重置参数
begin_index := i;
max_size := 0;
x := point_.X;
y := point_.Y - line_pt;
point_.Y := y;
// 换页
if self.CheckAndAddPage() then
begin
page := current_page_;
x := point_.X;
y := point_.Y;
end
end
else begin
i++;
if i > length(words)-1 then // 到了末尾仍未换行
begin
y := y - offset - max_size + max_size / 5;
for j:=begin_index to length(words)-1 do
lines[j]["y"] := y;
point_.Y -= line_pt;
end
end
end
return lines;
end;
function TSDocxToPdf.SetPageItalic(page, x, y);
begin
angle := 130;
rad := angle / 180 / Pi();
page.SetTextMatrix(1, 0, tan(rad), 1, x, y);
end;
function TSDocxToPdf.TransformParagraph(paragraph);
begin
// 1. 字体大小
// 2. 字体颜色
// 3. 字体斜体
// 4. 字体粗体
// 5. 首行间距
// 6. 字符间距
// 7. 下划线
// 8. 删除线
// 9. 上下标
// 10. 对齐方式
paragraph_ware := new TSParagraphWare(docx_components_, styles_adapter_, paragraph);
paragraph_ware.Do();
// 将段落中间件的每一个字符序列化成每一行
lines := self.ParagraphWordsToLine(paragraph_ware);
// 开始写入pdf
for i:=0 to length(lines)-1 do
begin
line := lines[i];
word := line["word"];
rpr := line["rpr"];
[r, g, b] := array(0, 0, 0);
if rpr.Color.Val then [r, g, b] := TSColorToolKit.HexToRGB(rpr.Color.Val);
page := line["page"];
page.SetRGBFill(r / 255, g / 255, b / 255);
page.SetFontAndSize(line["font"], rpr.Sz.Val);
x := line["x"];
y := line["y"];
if draw_bold then
begin
// 1. 调整偏移位置重复绘制 -- 当前参数的效果不太好
// z := 0.0005 * sqrt(x*x + y*y) * sqrt(2) / 2;
// left_x := x - z; // 左下角x位置
// left_y := y - z; // 左下角y位置
// right_x := x + z; // 右上角x位置
// right_y := y + z; // 右上角y位置
// offset := word.WordProperty.Size / 1000; // 每次偏移千分之一
// println("z = {}", z);
// println("x = {}, y = {}, offset = {}", x, y, offset);
// println("left_x = {}, left_y = {}, right_x = {}, left_y = {}\n", left_x, left_y, right_x, right_x);
// while left_x <= right_x and left_y <= right_y do
// begin
// if draw_italic then self.SetPageItalic(page, left_x, left_y);
// page.TextOut(left_x, left_y, word.Word);
// left_x += offset;
// left_y += right_y;
// end
// 2. 调整字体大小x,y位置重复绘制
multi := 1.030;
size := word.WordProperty.Size;
target_size := size * multi;
offset := size * 0.01;
size /= multi;
while size <= target_size do
begin
page.SetFontAndSize(font, size);
page.beginText();
if draw_italic then self.SetPageItalic(page, x, y);
page.TextOut(x, y, word.Word);
page.endText();
size += offset;
end
end
else begin
page.beginText();
page.TextOut(x, y, word);
page.endText();
end
page.SetRGBFill(0, 0, 0);
end
end;
function TSDocxToPdf.TransformDrawing(drawing);
begin
end;
function TSDocxToPdf.TransformTable(table);
begin
end;
function TSDocxToPdf.CalcPagragraphPt(size, line);
begin
if ifnil(line) then line := 12;
lines := self.FloatN(line / 12, 2);
multi := Ceil(size / base_size_);
return sect_.DocGrid.LinePitch * multi;
end;
function TSDocxToPdf.FloatN(r, n);
begin
return Round(r * IntPower(10, n)) / IntPower(10, n);
end;
function TSDocxToPdf.ReadFont();
begin
return font_ware_;
end;

BIN
fonts/仿宋.ttf Normal file

Binary file not shown.

BIN
fonts/华文楷体.ttf Normal file

Binary file not shown.

BIN
fonts/宋体.ttc Normal file

Binary file not shown.

BIN
fonts/微软雅黑-bold.ttc Normal file

Binary file not shown.

Binary file not shown.

BIN
fonts/微软雅黑.ttc Normal file

Binary file not shown.

BIN
fonts/等线-bold.ttf Normal file

Binary file not shown.

BIN
fonts/等线-light.ttf Normal file

Binary file not shown.

BIN
fonts/等线.ttf Normal file

Binary file not shown.

BIN
fonts/黑体.ttf Normal file

Binary file not shown.

20
test/test.tsl Normal file
View File

@ -0,0 +1,20 @@
// NameSpace "DOCX";
WordToPdf();
function WordToPdf();
begin
alias := "";
doc := "D:\\temp\\fangan.docx";
output := "D:\\temp\\1.pdf";
// doc := "/mnt/d/temp/fontsize.docx";
// output := "/mnt/d/temp/1.pdf";
docx_to_pdf := new TSDocxToPdf(alias, doc);
docx_to_pdf.Font.UseBuiltInFont();
docx_to_pdf.Font.SetSubstitutionRules("仿宋", "宋体");
docx_to_pdf.Transform();
err := docx_to_pdf.SaveToFile(alias, output);
echo "SaveToFile::\t", "err := ", format("%x", err), "\toutput_file := ", output, "\n";
end;

23
utils/TSColorToolKit.tsf Normal file
View File

@ -0,0 +1,23 @@
unit TSColorToolKit;
interface
function HexToRGB(hex);
implementation
function HexToRGB(hex);
begin
hex_string := ifnumber(hex) ? format("%x", hex) : hex;
if length(hex_string) = 7 then
begin
if hex_string[1] <> "#" then raise "Invalid hexadecimal parameter.";
hex_string := hex_string[1:];
end
if length(hex_string) <> 6 then raise "Invalid hexadecimal parameter";
r := eval(&"return 0x" + hex_string[1:2]);
g := eval(&"return 0x" + hex_string[3:4]);
b := eval(&"return 0x" + hex_string[5:6]);
return array(r, g, b);
end;
end.

100
utils/TSFontWare.tsf Normal file
View File

@ -0,0 +1,100 @@
type TSFontWare = class
public
function Create(pdf);
function Init();
function UseBuiltInFont();
function SetSubstitutionRules(source, target);
function GetFont(name, bold, italic);
private
function GetExternalFont(name, bold, italic);
function GetBuiltInFont(name, bold, italic);
private
pdf_;
is_linux_; // 是否是linux
use_built_in_font_; // 是否使用内置字体
substitution_rules_; // 替换规则
external_reference_;
external_font_cache_;
end;
function TSFontWare.Create(pdf);
begin
pdf_ := pdf;
is_linux_ := true;
use_built_in_font_ := false;
substitution_rules_ := array("宋体": "SimSun", "黑体": "SimHei");
external_reference_ := array();
external_font_cache_ := array();
// self.Init();
end;
function TSFontWare.UseBuiltInFont();
begin
use_built_in_font_ := true;
end;
function TSFontWare.Init();
begin
{$IFDEF LINUX}
is_linux_ := true;
{$ELSE}
is_linux_ := false;
{$ENDIF}
separator := is_linux_ ? "/" : "\\";
path := extractFileDir(sysExecName()) + separator + "funcext" + separator + "WordToPdf" + separator + "fonts" + separator;
files := fileList("", path + "*.tt*");
for i:=0 to length(files)-1 do
begin
filename := files[i]["FileName"];
ext := extractFileExt(filename);
pos := pos(ext, filename);
name := is_linux_ ? filename[:pos-1] : ansiToUTF8(filename[:pos-1]);
external_reference_[name] := array("ext": ext, "path": path + filename);
end
end;
function TSFontWare.GetExternalFont(name, bold, italic);
begin
if ifnil(name) or name = '' then name := "等线";
if not ifnil(external_font_cache_[name]) then return external_font_cache_[name];
value := external_reference_[name];
if ifnil(value) then return nil;
// if ifnil(value) then raise name + " is unsupported font.";
if value["ext"] = ".ttf" then
font_name := pdf_.LoadTTFontFromFile("", value["path"], true);
else if value["ext"] = ".ttc" then
font_name := pdf_.LoadTTFontFromFile2("", value["path"], 0, true);
// if not ifString(font_name) then raise "Load font error : " + format("%x", font_name);
if not ifString(font_name) then return nil;
font := pdf_.GetFont(font_name, "UTF-8");
external_font_cache_[name] := font;
return font;
end;
function TSFontWare.GetFont(name, bold, italic);
begin
return use_built_in_font_ ? self.GetBuiltInFont(name, bold, italic) : self.GetExternalFont(name, bold, italic);
end;
function TSFontWare.GetBuiltInFont(name, bold, italic);
begin
font_name := substitution_rules_["name"];
if ifnil(font_name) then font_name := "SimSun";
if bold and italic then
font_name += ",BoldItalic";
else if bold then
font_name += ",Bold";
else if italic then
font_name += ",Italic";
font := pdf_.GetFont(font_name, "GBK-EUC-H");
return font;
end;
function TSFontWare.SetSubstitutionRules(source, target);
begin
substitution_rules_[source] := target;
end;

108
utils/TSParagraphWare.tsf Normal file
View File

@ -0,0 +1,108 @@
type TSParagraphWare = class
public
function Create(components, styles, paragraph);
function Do();
function GetWords();
property Paragraph read ReadParagraph;
function ReadParagraph();
private
function SetPPr(ppr);
function SetRPr(rpr, ppr);
function SetRPrByStyleId(rpr, style_id);
function SetPPrByStyleId(ppr, style_id);
private
docx_components_;
styles_;
paragraph_;
words_;
end;
function TSParagraphWare.Create(components, styles, paragraph);
begin
docx_components_ := components;
styles_ := styles;
paragraph_ := paragraph;
words_ := array();
end;
function TSParagraphWare.Do();
begin
self.SetPPr(paragraph_.PPr); // styleid与ppr样式合并
rs := paragraph_.Rs();
for i:=0 to length(rs)-1 do
begin
r := rs[i];
self.SetRPr(r.RPr, paragraph_.PPr); // rpr样式与ppr与styleid样式合并
rpr := new RPrUnitDecorator(r.RPr);
pos := 1;
text := r.T.Text;
while pos <= length(text) do
begin
c := text[pos];
pos ++;
if ord(c) > 127 then
begin
c := text[pos-1 : pos+1];
pos += 2;
end
words_[length(words_)] := array(utf8ToAnsi(c), rpr);
end
end
paragraph_.PPr := new PPrUnitDecorator(paragraph_.PPr);
end;
function TSParagraphWare.SetPPrByStyleId(ppr, style_id);
begin
style := styles_.StyleId(style_id);
if ifObj(style) then
begin
based_on := style.BasedOn.Val;
self.SetPPrByStyleId(ppr, based_on);
ppr.Copy(style.PPr);
ppr.Rpr.Copy(style.RPr);
end
end;
function TSParagraphWare.SetPPr(ppr);
begin
new_ppr := new PPr();
style_id := ppr.PStyle.Val;
self.SetPPrByStyleId(new_ppr, style_id);
new_ppr.Copy(ppr);
ppr.Copy(new_ppr);
end;
function TSParagraphWare.SetRPrByStyleId(rpr, style_id);
begin
style := styles_.StyleId(style_id);
if ifObj(style) then
begin
based_on := style.BasedOn.Val;
self.SetRPrByStyleId(rpr, based_on);
rpr.Copy(style.RPr);
end
end;
function TSParagraphWare.SetRPr(rpr, ppr);
begin
// rpr先继承ppr再继承样式最后继承自己
new_rpr := new RPr();
style_id := rpr.RStyle.Val;
new_rpr.Copy(ppr.RPr);
self.SetRPrByStyleId(new_rpr, style_id);
new_rpr.Copy(rpr);
rpr.Copy(new_rpr);
end;
function TSParagraphWare.GetWords();
begin
return words_;
end;
function TSParagraphWare.ReadParagraph();
begin
return paragraph_;
end