PdfConverter/TSDocxToPdf.tsf

476 lines
16 KiB
Plaintext

type TSDocxToPdf = class
uses TSPdfEnumerations, DocxML, DocxMLAdapter, DocxMLUnitDecorator, DTPModules, DTPUtils, DTPAdvancedRanges;
public
function Create(alias: string; file: string);
function Destroy();
function SaveToFile(alias: string; file: string): integer;
function Transform();
function GetCachePath(image_path: string): string;
function GetCurrentXmlFile(): string;
function GetCurrentNoteModule(): NoteModule;
function AddPage(flag: boolean): Page;
function ProcessNumpages();
// docx页码扩展
function UpdateDocxPageNumPages();
function SaveDocxFile();overload;
function SaveDocxFile(alias: string; file: string);overload;
property PdfFile read pdf_;
property Font read font_module_;
property PageManager read page_manager_module_;
property Toc read toc_module_;
property Note read note_module_;
private
function InitDocxComponents(alias: string; file: string);
function InitCachePath(file: string);
function InitPdfEncoder();
function InitSectModule();
function AllocateElementsToSectModule();
function ClassifyCols(var point: Point; cols: Cols);
function SetHdr(type: string);
function SetFtr(type: string);
function TransformP(paragraph: P; x: real; y: real; w: real; lb: real): ParagraphRange;
function TransformTbl(table: Tbl; x: real; y: real; w: real; lb: real): TableRange;
function TransformSdt(sdt: Sdt; x: real; y: real; w: real; lb: real): array of ParagraphRange;
private
pdf_: PdfFile;
docx_components_module_: DocxComponentsModule; // TSDocxComponentsModule
cache_path_: string; // 临时目录,用来存放临时文件
font_module_: FontModule; // 字体模块
sect_module_array_: array of SectModule; // 页面布局模块数组
current_sect_module_: SectModule;
current_sect_pr_adapter_: SectPrAdapter;
page_manager_module_: PageManagerModule;
current_page_: Page;
toc_module_: TocModule; // 目录模块
note_module_: NoteModule; // 脚注/尾注
range_page_number_array_: tableArray;
xml_file_: string;
even_and_odd_flag_: boolean;
// 回写docx
docx_page_arr_: tableArray;
update_docx_pages_: boolean;
end;
function TSDocxToPdf.Create(alias: string; file: string);
begin
pdf_ := new PdfFile();
pdf_.SetCompressionMode(TSPdfEnumerations.COMP_ALL);
font_module_ := new DTPModules.FontModule(pdf_);
{self.}InitPdfEncoder();
{self.}InitDocxComponents(alias, file);
{self.}InitCachePath(file);
{self.}InitSectModule();
current_page_ := nil;
page_manager_module_ := new DTPModules.PageManagerModule(pdf_);
toc_module_ := new DTPModules.TocModule();
range_page_number_array_ := array();
xml_file_ := "document.xml";
settings := docx_components_module_.Settings;
settings.XmlChildEvenAndOddHeaders.Deserialize();
even_and_odd_flag_ := settings.EvenAndOddHeaders ? true : false;
note_module_ := new ;
// 回写docx
docx_page_arr_ := array();
update_docx_pages_ := false;
end;
function TSDocxToPdf.Destroy();
begin
removeDir("", cache_path_);
end;
function TSDocxToPdf.SaveToFile(alias: string; file: string): integer;
begin
return pdf_.SaveToFile(alias, file);
end;
function TSDocxToPdf.Transform();
begin
for _,sect_module in sect_module_array_ do
begin
if current_sect_module_ <> sect_module then
begin
current_sect_module_ := sect_module;
current_sect_pr_adapter_ := new SectPrAdapter(current_sect_module_.SectPr.GetObject());
{self.}AddPage(true);
xml_file_ := "document.xml";
end
// 分栏
elements := sect_module.Elements();
cols := current_sect_module_.SectPr.Cols;
if cols.Num > 1 then
begin
columns := {self.}ClassifyCols(current_page_.TextPoint, cols);
end
else begin
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
lb := max(current_page_.SectPr.PgMar.Bottom, current_page_.FtrPoint.Y);
for _,element in elements do
begin
x := current_page_.TextPoint.X;
y := current_page_.TextPoint.Y;
// if _ = 266 then break;
// if _ = 20 then
// println("_ = {}, xml_file_ = {}, error = {}", _, xml_file_, pdf_.GetError());
range := nil;
if element.LocalName = "p" then range := {self.}TransformP(element, x, y, w, lb);
else if element.LocalName = "tbl" then range := {self.}TransformTbl(element, x, y, w, lb);
else if element.LocalName = "sdt" then {self.}TransformSdt(element, x, y, w, lb);
if ifObj(range) then
begin
current_page_.TextPoint.Y := range.EndY;
range.Do();
end
end
end
end
{self.}ProcessNumpages();
end;
function TSDocxToPdf.GetCachePath(image_path: string): string;
begin
return cache_path_ + extractFileName(image_path);
end;
function TSDocxToPdf.GetCurrentXmlFile(): string;
begin
return xml_file_;
end;
function TSDocxToPdf.InitPdfEncoder();
begin
pdf_.UseCNSFonts();
pdf_.UseCNSEncodings();
// pdf_.UseUTFEncodings();
end;
function TSDocxToPdf.InitDocxComponents(alias: string; file: string);
begin
docx_components_module_ := new DTPModules.DocxComponentsModule();
[err, msg] := docx_components_module_.Open(alias, file, nil);
if err then raise "Open file error.";
end;
function TSDocxToPdf.InitCachePath(file: string);
begin
path := format("%s_%s", extractFileName(file), formatDatetime("YYYYMMDDHHNNSSZZZ", now()));
cache_dir := format("%s/funcext/PdfConverter/.cache", extractFileDir(sysExecName()));
createDir("", cache_dir);
cache_path_ := format("%s/%s/", cache_dir, path);
createDir("", cache_path_);
end;
function TSDocxToPdf.InitSectModule();
begin
sect_module_array_ := array();
current_sect_module_ := nil;
current_sect_pr_adapter_ := nil;
document := docx_components_module_.Document;
document.Deserialize();
{self.}AllocateElementsToSectModule();
end;
function TSDocxToPdf.AllocateElementsToSectModule();
begin
elements := docx_components_module_.Document.Body.Elements();
module := new DTPModules.SectModule();
fp := function(module, sect);
begin
sect := new SectPrUnitDecorator(sect);
sect.PgSz.Orient := sect.PgSz.Orient ? "portrait" : "landscape";
sect.Type.Val := sect.Type.Val ?: "nextPage";
module.SectPr := sect;
end
for i:=0 to length(elements)-1 do
begin
element := elements[i];
module.AddElement(element);
if element.LocalName = "p" and ifObj(element.PPr.SectPr) then
begin
##fp(module, element.PPr.SectPr);
sect_module_array_[length(sect_module_array_)] := module;
module := new DTPModules.SectModule();
end
else if element.LocalName = "sectPr" and i = length(elements)-1 then
begin
##fp(module, element);
sect_module_array_[length(sect_module_array_)] := module;
end
end
// println("sect_module_array_ = {}", sect_module_array_);
end;
function TSDocxToPdf.AddPage(flag: boolean = false): Page;
begin
if current_sect_module_.SectPr.Type.Val = "continuous" and page_manager_module_.Count <> 0 then return;
page := page_manager_module_.NewPage();
page.SectPr := current_sect_module_.SectPr;
page.Number := flag ? ifnil(current_sect_module_.SectPr.PgNumType.Start) ? 1 : current_sect_module_.SectPr.PgNumType.Start : page_manager_module_[page_manager_module_.Count() - 2].Number + 1;
page.BaseSize := round(page.SectPr.DocGrid.LinePitch * 0.75);
// 正文、页眉页脚坐标
// 页眉:从上往下,不需要调整
page.HdrPoint.X := page.SectPr.PgMar.Left;
page.HdrPoint.Y := page.SectPr.PgSz.H - page.SectPr.PgMar.Header;
// 页脚:从下往上,需要调整
page.FtrPoint.X := page.SectPr.PgMar.Left;
page.FtrPoint.Y := page.SectPr.PgSz.H;
// 正文:运行完页眉页脚后需要再次计算
page.TextPoint.X := page.SectPr.PgMar.Left;
page.TextPoint.Y := page.SectPr.PgSz.H - max(page.SectPr.PgMar.Top, page.SectPr.PgMar.Header);
current_page_ := page;
// println("TextPoint.X = {}, TextPoint.Y = {}", current_page_.TextPoint.X, current_page_.TextPoint.Y);
// println("len = {}, Number = {}", page_manager_module_.Count(), current_page_.Number);
// 页眉页脚
if current_sect_module_.SectPr.TitlePg and current_page_.Index = 0 then
type_name := "first";
else if not even_and_odd_flag_ then
type_name := "default";
else if not odd(current_page_.Index) then
type_name := "even"
else
type_name := "default";
bk_file := xml_file_;
{self.}SetHdr(type_name);
{self.}SetFtr(type_name);
xml_file_ := bk_file;
// println("W = {}, H = {}", current_page_.SectPr.PgSz.W, current_page_.SectPr.PgSz.H);
// println("Top = {}, Bottom = {}", current_page_.SectPr.PgMar.Top, current_page_.SectPr.PgMar.Bottom);
// println("Left = {}, Right = {}", current_page_.SectPr.PgMar.Left, current_page_.SectPr.PgMar.Right);
// println("Header = {}", current_page_.SectPr.PgMar.Header);
// println("TextPoint.X = {}, TextPoint.Y = {}", current_page_.TextPoint.X, current_page_.TextPoint.Y);
// println("HdrPoint.X = {}, HdrPoint.Y = {}", current_page_.HdrPoint.X, current_page_.HdrPoint.Y);
// println("FtrPoint.X = {}, FtrPoint.Y = {}", current_page_.FtrPoint.X, current_page_.FtrPoint.Y);
return current_page_;
end;
function TSDocxToPdf.SetFtr(type: string);
begin
footer_reference := current_sect_pr_adapter_.GetFooterReferenceByType(type);
ftr_range := array();
if ifObj(footer_reference) then
begin
rels_adapter := docx_components_module_.GetDocumentRelsAdapter();
rel := rels_adapter.GetRelationshipById(footer_reference.Id);
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
lb := 0;
obj := docx_components_module_.GetFtr(rel.Target);
xml_file_ := rel.Target;
elements := obj.Elements();
for _,element in elements do
begin
x := current_page_.FtrPoint.X;
y := current_page_.FtrPoint.Y;
range := nil;
if element.LocalName = "p" then range := {self.}TransformP(element, x, y, w, lb);
if ifObj(range) then
begin
current_page_.FtrPoint.Y := range.EndY;
ftr_range[length(ftr_range)] := range;
end
end
end
last_range := ftr_range[length(ftr_range)-1];
y_offset := last_range.EndY - current_page_.SectPr.PgMar.Bottom;
height := 0;
for _,range in ftr_range do
begin
range.Offset(0, y_offset);
range.Do();
height += range.DynamicHeight;
end
current_page_.FtrPoint.Y := current_page_.SectPr.PgMar.Bottom + height;
end;
function TSDocxToPdf.SetHdr(type: string);
begin
header_reference := current_sect_pr_adapter_.GetHeaderReferenceByType(type);
if ifObj(header_reference) then
begin
rels_adapter := docx_components_module_.GetDocumentRelsAdapter();
rel := rels_adapter.GetRelationshipById(header_reference.Id);
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
lb := 0;
obj := docx_components_module_.GetHdr(rel.Target);
xml_file_ := rel.Target;
elements := obj.Elements();
for _,element in elements do
begin
x := current_page_.HdrPoint.X;
y := current_page_.HdrPoint.Y;
range := nil;
if element.LocalName = "p" then range := {self.}TransformP(element, x, y, w, lb);
if ifObj(range) then
begin
current_page_.HdrPoint.Y := range.EndY;
range.Do();
current_page_.TextPoint.Y := min(current_page_.TextPoint.Y, range.EndY);
end
end
end
end;
function TSDocxToPdf.TransformP(paragraph: P; x: real; y: real; w: real; lb: real): ParagraphRange;
begin
range := new ParagraphRange(self, current_page_, docx_components_module_, paragraph);
range.StartX := x;
range.StartY := y;
range.Width := w;
range.LowerBound := lb;
range.Parent := self;
r := range.Calc();
if r then return range;
else range_page_number_array_[length(range_page_number_array_)] := range;
return nil;
end;
function TSDocxToPdf.TransformTbl(table: Tbl; x: real; y: real; w: real; lb: real): TableRange;
begin
range := new TableRange(self, current_page_, docx_components_module_, table);
range.StartX := x;
range.StartY := y;
range.Width := w;
range.LowerBound := lb;
range.Parent := self;
range.Calc();
return range;
end;
function TSDocxToPdf.TransformSdt(sdt: Sdt; x: real; y: real; w: real; lb: real): array of ParagraphRange;
begin
ps := sdt.SdtContent.Ps();
arr := array();
for _,p in ps do
begin
range := {self.}TransformP(p, x, y, w, lb);
if ifObj(range) then arr[length(arr)] := range;
end
return arr;
end;
function TSDocxToPdf.ProcessNumpages();
begin
nums := page_manager_module_[page_manager_module_.Count() - 1].Number;
for _,range in range_page_number_array_ do
begin
range.SetNumPages(nums);
range.RangesToLines();
range.Offset(0, current_page_.SectPr.PgSz.H - current_page_.SectPr.PgMar.Bottom);
range.Do();
end
end;
function TSDocxToPdf.UpdateDocxPageNumPages();
begin
toc_module_.UpdateDocxNumPages();
end;
function TSDocxToPdf.SaveDocxFile();overload;
begin
return docx_components_module_.Save();
end;
function TSDocxToPdf.SaveDocxFile(alias: string; file: string);overload;
begin
return docx_components_module_.SaveAs(alias, file);
end;
function TSDocxToPdf.GetCurrentNoteModule(): NoteModule;
begin
if ifnil(note_module_) then note_module_ := new NoteModule(current_sect_module_);
return note_module_;
end;
function TSDocxToPdf.ClassifyCols(var point: Point; cols: Cols);
begin
bk_page := current_page_;
columns := array();
x := point.X;
y := point.Y;
w := current_page_.SectPr.PgSz.W - current_page_.SectPr.PgMar.Right - current_page_.SectPr.PgMar.Left;
lb := current_page_.SectPr.PgMar.Bottom;
w_array := array();
ccols := cols.Cols();
for i:=0 to cols.Num-1 do
begin
rw := 0;
if cols.EqualWidth = "0" then
begin
rw := ccols[i].W;
if i > 0 then x += ccols[i-1].W + ccols[i-1].Space;
end
else begin
rw := w / 3;
x := point.X + i * rw + i * cols.Space;
end
range := new ColumnRange(self, current_page_, docx_components_module_);
range.StartX := x;
range.StartY := y;
range.Width := rw;
range.LowerBound := lb;
columns[length(columns)] := range;
end
i := 0;
elements := current_sect_module_.Elements();
range := columns[0];
for _,element in elements do
begin
if element.LocalName = "p" then
begin
p := new P();
sub_elements := element.Elements();
for _,sub in sub_elements do
begin
p.AppendChild(sub);
if sub.LocalName = "r" and sub.Br.Type = "column" then
begin
range.AddElement(p);
p := new P();
p.PPr.Copy(element.PPr);
range := columns[++i];
end
end
range.AddElement(p);
end
else begin
range.AddElement(element);
end
end
pg := nil;
max_y := nil;
for _,column in columns do
begin
column.Do();
page := column.GetLastPage();
if ifnil(pg) then pg := page;
if ifnil(max_y) then max_y := range.EndY;
if page.Index > pg.Index then max_y := range.EndY;
else if page.Index = pg.Index and max_y > range.EndY then max_y := range.EndY;
end
current_page_ := bk_page;
point.Y := max_y;
end;