PdfConverter/TSDocxToPdf.tsf

481 lines
17 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

type TSDocxToPdf = class
uses TSPdfEnumerations, DocxMLAdapters, DTPModules, DTPAdvancedRanges;
public
function Create(alias: string; file: string);
function Destroy();
function SaveToFile(alias: string; file: string): integer;
function Transform();
function GetCachePath(image_path: string): string;
function GetCurrentXmlFile(): string;
function AddPage(flag: boolean): Page;
function ProcessRealtimeRange();
// docx页码扩展
function UpdateDocxPageNumPages();
function SaveDocxFile();overload;
function SaveDocxFile(alias: string; file: string);overload;
property PdfFile read pdf_;
property DocxComponents read docx_components_module_;
property Font read font_module_;
property PageManager read page_manager_module_;
property Toc read toc_module_;
property Note read note_module_;
property CurrentSect read current_sect_module_;
property CurrentPage read current_page_;
private
function InitDocxComponents(alias: string; file: string);
function InitCachePath(file: string);
function InitPdfEncoder();
function InitSectModule();
function AllocateElementsToSectModule();
function ClassifyCols(var point: Point; cols: Cols);
function RangesSpacing(range: BasicRange);
function SetHdr(type: string);
function SetFtr(type: string);
function TransformP(p: P);
function TransformTbl(tbl: Tbl);
function TransformSdt(sdt: Sdt);
private
pdf_: PdfFile;
docx_components_module_: DocxComponentsModule; // DocxComponentsModule
cache_path_: string; // 临时目录,用来存放临时文件
font_module_: FontModule; // 字体模块
sect_module_array_: array of SectModule; // 页面布局模块数组
current_sect_module_: SectModule;
current_sect_pr_adapter_: SectPr; // DocxMLAdapters.SectPr
page_manager_module_: PageManagerModule;
current_page_: Page;
toc_module_: TocModule; // 目录模块
note_module_: NoteModule; // 脚注/尾注
xml_file_: string;
even_and_odd_flag_: boolean;
realtime_range_array_: array of BasicRange; // 最后才能计算出来的range比如包含总页码的
last_range_: BasicRange; // 上一个range
// 回写docx
docx_page_arr_: tableArray;
update_docx_pages_: boolean;
end;
function TSDocxToPdf.Create(alias: string; file: string);
begin
pdf_ := new PdfFile();
pdf_.SetCompressionMode(TSPdfEnumerations.COMP_ALL);
font_module_ := new DTPModules.FontModule(pdf_);
{self.}InitPdfEncoder();
{self.}InitDocxComponents(alias, file);
{self.}InitCachePath(file);
{self.}InitSectModule();
current_page_ := nil;
page_manager_module_ := new DTPModules.PageManagerModule(pdf_);
toc_module_ := new DTPModules.TocModule();
xml_file_ := "document.xml";
settings := docx_components_module_.Settings;
settings.XmlChildEvenAndOddHeaders.Deserialize();
even_and_odd_flag_ := settings.EvenAndOddHeaders ? true : false;
note_module_ := new DTPModules.NoteModule(self);
realtime_range_array_ := array();
// 回写docx
docx_page_arr_ := array();
update_docx_pages_ := false;
end;
function TSDocxToPdf.Destroy();
begin
removeDir("", cache_path_);
end;
function TSDocxToPdf.SaveToFile(alias: string; file: string): integer;
begin
return pdf_.SaveToFile(alias, file);
end;
function TSDocxToPdf.Transform();
begin
for _,sect_module in sect_module_array_ do
begin
if current_sect_module_ <> sect_module then
begin
current_sect_module_ := sect_module;
current_sect_pr_adapter_ := new DocxMLAdapters.SectPr(current_sect_module_.SectPr);
{self.}AddPage(true);
end
// 分栏
elements := sect_module.Elements();
cols := current_sect_module_.SectPr.Cols;
if cols.Num > 1 then
begin
columns := {self.}ClassifyCols(current_page_.TextPoint, cols);
end
else begin
for _,element in elements do
begin
// if _ = 4 then break;
// if _ = 31 then
// if element.LocalName = "p" then
// println("_ = {}, paraid = {}, xml_file_ = {}, error = {}", _, element.ParaId, xml_file_, pdf_.GetError());
// println("_ = {}", _);
if element.LocalName = "p" then {self.}TransformP(element);
else if element.LocalName = "tbl" then {self.}TransformTbl(element);
else if element.LocalName = "sdt" then {self.}TransformSdt(element);
end
end
end
{self.}ProcessRealtimeRange();
end;
function TSDocxToPdf.GetCachePath(image_path: string): string;
begin
return cache_path_ + extractFileName(image_path);
end;
function TSDocxToPdf.GetCurrentXmlFile(): string;
begin
return xml_file_;
end;
function TSDocxToPdf.InitPdfEncoder();
begin
pdf_.UseCNSFonts();
pdf_.UseCNSEncodings();
// pdf_.UseUTFEncodings();
end;
function TSDocxToPdf.InitDocxComponents(alias: string; file: string);
begin
docx_components_module_ := new DTPModules.DocxComponentsModule();
[err, msg] := docx_components_module_.Open(alias, file, nil);
if err then raise "Open file error.";
end;
function TSDocxToPdf.InitCachePath(file: string);
begin
path := format("%s_%s", extractFileName(file), formatDatetime("YYYYMMDDHHNNSSZZZ", now()));
cache_dir := format("%s/funcext/PdfConverter/.cache", extractFileDir(sysExecName()));
createDir("", cache_dir);
cache_path_ := format("%s/%s/", cache_dir, path);
createDir("", cache_path_);
end;
function TSDocxToPdf.InitSectModule();
begin
sect_module_array_ := array();
current_sect_module_ := nil;
current_sect_pr_adapter_ := nil;
document := docx_components_module_.Document;
document.Deserialize();
document.ConvertToPoint();
{self.}AllocateElementsToSectModule();
end;
function TSDocxToPdf.AllocateElementsToSectModule();
begin
elements := docx_components_module_.Document.Body.Elements();
module := new DTPModules.SectModule();
fp := function(module, sect);
begin
sect.PgSz.Orient := sect.PgSz.Orient ? "portrait" : "landscape";
sect.Type.Val := sect.Type.Val ?: "nextPage";
module.SectPr := sect;
module.BaseSize := round(sect.DocGrid.LinePitch * 0.75);
end
for i:=0 to length(elements)-1 do
begin
element := elements[i];
module.AddElement(element);
if element.LocalName = "p" and element.PPr.SectPr then
begin
##fp(module, element.PPr.SectPr);
sect_module_array_[length(sect_module_array_)] := module;
module := new DTPModules.SectModule();
end
else if element.LocalName = "sectPr" and i = length(elements)-1 then
begin
##fp(module, element);
sect_module_array_[length(sect_module_array_)] := module;
end
end
// println("sect_module_array_ = {}", sect_module_array_);
end;
function TSDocxToPdf.AddPage(flag: boolean = false): Page;
begin
if current_sect_module_.SectPr.Type.Val = "continuous" and page_manager_module_.Count <> 0 then return;
page := page_manager_module_.NewPage();
page.SectPr := current_sect_module_.SectPr;
page.Number := flag ? ifnil(current_sect_module_.SectPr.PgNumType.Start) ? 1 : current_sect_module_.SectPr.PgNumType.Start : page_manager_module_[page_manager_module_.Count() - 2].Number + 1;
current_page_ := page;
// 页眉页脚
if current_sect_module_.SectPr.TitlePg and current_page_.Index = 0 then
type_name := "first";
else if not even_and_odd_flag_ then
type_name := "default";
else if not odd(current_page_.Index) then
type_name := "even";
else
type_name := "default";
{self.}SetHdr(type_name);
{self.}SetFtr(type_name);
// 设置完页眉页脚计算正文坐标
page.TextPoint.X := page.SectPr.PgMar.Left;
page.TextPoint.Y := page.SectPr.PgSz.H - max(page.SectPr.PgMar.Top, page.SectPr.PgMar.Header);
page.TextPoint.Y := min(page.TextPoint.Y, page.HdrPoint.Y);
page.UpperBound := page.TextPoint.Y;
page.LowerBound := max(page.SectPr.PgMar.Bottom, page.FtrPoint.Y);
// println("len = {}, Number = {}", page_manager_module_.Count(), current_page_.Number);
// println("W = {}, H = {}", current_page_.SectPr.PgSz.W, current_page_.SectPr.PgSz.H);
// println("Top = {}, Bottom = {}", current_page_.SectPr.PgMar.Top, current_page_.SectPr.PgMar.Bottom);
// println("Left = {}, Right = {}", current_page_.SectPr.PgMar.Left, current_page_.SectPr.PgMar.Right);
// println("Header = {}", current_page_.SectPr.PgMar.Header);
// println("TextPoint.X = {}, TextPoint.Y = {}", current_page_.TextPoint.X, current_page_.TextPoint.Y);
// println("HdrPoint.X = {}, HdrPoint.Y = {}", current_page_.HdrPoint.X, current_page_.HdrPoint.Y);
// println("FtrPoint.X = {}, FtrPoint.Y = {}", current_page_.FtrPoint.X, current_page_.FtrPoint.Y);
xml_file_ := "document.xml";
return current_page_;
end;
function TSDocxToPdf.SetFtr(type: string);
begin
// 页脚:从下往上,需要调整
current_page_.FtrPoint.X := current_page_.SectPr.PgMar.Left;
current_page_.FtrPoint.Y := current_page_.SectPr.PgMar.Footer;
footer_reference := current_sect_pr_adapter_.GetFooterReferenceByType(type);
if not ifObj(footer_reference) then return;
rels_adapter := docx_components_module_.GetDocumentRelsAdapter();
rel := rels_adapter.GetRelationshipById(footer_reference.Id);
ftr := docx_components_module_.GetFtr(rel.Target);
xml_file_ := rel.Target;
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
ftr_range := new DTPAdvancedRanges.FtrRange(self, ftr);
ftr_range.Width := w;
ftr_range.Parent := self;
ftr_range.LowerBound := 0;
flag := ftr_range.Calc();
// y_offset := current_page_.SectPr.PgSz.H - ftr_range.DynamicHeight - current_page_.SectPr.PgMar.Footer;
y_offset := current_page_.SectPr.PgMar.Footer + ftr_range.DynamicHeight;
ftr_range.Offset(current_page_.SectPr.PgMar.Left, y_offset, current_page_);
if flag then
ftr_range.Do();
else
realtime_range_array_[length(realtime_range_array_)] := ftr_range;
current_page_.FtrPoint.Y := ftr_range.StartY; // 向下偏移后的起始位置=正文的下边界
end;
function TSDocxToPdf.SetHdr(type: string);
begin
// 正文、页眉页脚坐标
// 页眉:从上往下,不需要调整
current_page_.HdrPoint.X := current_page_.SectPr.PgMar.Left;
current_page_.HdrPoint.Y := current_page_.SectPr.PgSz.H - current_page_.SectPr.PgMar.Header;
header_reference := current_sect_pr_adapter_.GetHeaderReferenceByType(type);
if not ifObj(header_reference) then return;
rels_adapter := docx_components_module_.GetDocumentRelsAdapter();
rel := rels_adapter.GetRelationshipById(header_reference.Id);
hdr := docx_components_module_.GetHdr(rel.Target);
xml_file_ := rel.Target;
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
hdr_range := new DTPAdvancedRanges.HdrRange(self, hdr);
hdr_range.Width := w;
hdr_range.Parent := self;
hdr_range.LowerBound := 0;
flag := hdr_range.Calc();
hdr_range.Offset(current_page_.SectPr.PgMar.Left, current_page_.SectPr.PgSz.H - current_page_.SectPr.PgMar.Header, current_page_);
if flag then
hdr_range.Do();
else
realtime_range_array_[length(realtime_range_array_)] := hdr_range;
current_page_.HdrPoint.Y := hdr_range.EndY;
end;
function TSDocxToPdf.RangesSpacing(range: BasicRange);
begin
if last_range_ then
begin
if last_range_ is class(DTPAdvancedRanges.PRange) and not ifnil(last_range_.P.PPr.OutlineLvl.Val) and (range is class(DTPAdvancedRanges.TblRange) or ifnil(range.P.PPr.OutlineLvl.Val)) then
begin
if last_range_.P.PPr.Spacing.AfterAutospacing then
begin
line_range := last_range_.PLineRanges();
line_range := line_range[length(line_range)-1];
y := line_range.EndY - line_range.TextMaxSize;
if y >= 0 then current_page_.TextPoint.Y := y;
end
end
if range is class(DTPAdvancedRanges.PRange) and not ifnil(range.P.PPr.OutlineLvl.Val) and (last_range_ is class(DTPAdvancedRanges.TblRange) or ifnil(last_range_.P.PPr.OutlineLvl.Val)) then
begin
if range.P.PPr.Spacing.BeforeAutospacing then
begin
line_range := range.PLineRanges();
line_range := line_range[length(line_range)-1];
y := last_range_.EndY - line_range.TextMaxSize;
if y >= 0 then current_page_.TextPoint.Y := y;
end
end
end
last_range_ := range;
end;
function TSDocxToPdf.TransformP(p: P);
begin
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
range := new DTPAdvancedRanges.PRange(self, p);
range.Width := w;
range.Parent := self;
range.LowerBound := current_page_.LowerBound;
fg := range.Calc();
{self.}RangesSpacing(range);
range.Offset(current_page_.TextPoint.X, current_page_.TextPoint.Y, current_page_);
if fg then
range.Do();
else
realtime_range_array_[length(realtime_range_array_)] := range;
current_page_.TextPoint.Y := range.EndY;
end;
function TSDocxToPdf.TransformTbl(tbl: Tbl);
begin
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
range := new DTPAdvancedRanges.TblRange(self, tbl);
range.Width := w;
range.Parent := self;
range.LowerBound := current_page_.LowerBound;
fg := range.Calc();
{self.}RangesSpacing(range);
range.Offset(current_page_.TextPoint.X, current_page_.TextPoint.Y, current_page_);
range.Do();
current_page_.TextPoint.Y := range.EndY;
end;
function TSDocxToPdf.TransformSdt(sdt: Sdt);
begin
ps := sdt.SdtContent.Ps();
for _,p in ps do
range := {self.}TransformP(p);
end;
function TSDocxToPdf.ProcessRealtimeRange();
begin
for _,range in realtime_range_array_ do
begin
range.ProcessRealtimeArray();
range.Do();
end
end;
function TSDocxToPdf.UpdateDocxPageNumPages();
begin
toc_module_.UpdateDocxNumPages();
end;
function TSDocxToPdf.SaveDocxFile();overload;
begin
return docx_components_module_.Save();
end;
function TSDocxToPdf.SaveDocxFile(alias: string; file: string);overload;
begin
return docx_components_module_.SaveAs(alias, file);
end;
function TSDocxToPdf.ClassifyCols(var point: Point; cols: Cols);
begin
return;
bk_page := current_page_;
columns := array();
x := point.X;
y := point.Y;
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
lb := current_page_.SectPr.PgMar.Bottom;
w_array := array();
ccols := cols.Cols();
for i:=0 to cols.Num-1 do
begin
rw := 0;
if cols.EqualWidth = "0" then
begin
rw := ccols[i].W;
if i > 0 then x += ccols[i-1].W + ccols[i-1].Space;
end
else begin
rw := w / 3;
x := point.X + i * rw + i * cols.Space;
end
range := new ColumnRange(self, current_page_, docx_components_module_);
range.StartX := x;
range.StartY := y;
range.Width := rw;
range.LowerBound := lb;
columns[length(columns)] := range;
end
i := 0;
elements := current_sect_module_.Elements();
range := columns[0];
for _,element in elements do
begin
if element.LocalName = "p" then
begin
p := new P();
sub_elements := element.Elements();
for _,sub in sub_elements do
begin
p.AppendChild(sub);
if sub.LocalName = "r" and sub.Br.Type = "column" then
begin
range.AddElement(p);
p := new P();
p.PPr.Copy(element.PPr);
range := columns[++i];
end
end
range.AddElement(p);
end
else begin
range.AddElement(element);
end
end
pg := nil;
max_y := nil;
for _,column in columns do
begin
column.Do();
page := column.GetLastPage();
if ifnil(pg) then pg := page;
if ifnil(max_y) then max_y := range.EndY;
if page.Index > pg.Index then max_y := range.EndY;
else if page.Index = pg.Index and max_y > range.EndY then max_y := range.EndY;
end
current_page_ := bk_page;
point.Y := max_y;
end;