PdfConverter/TSDocxToPdf.tsf

597 lines
18 KiB
Plaintext

type TSDocxToPdf = class
public
function Create(alias: string; file: string);
function Destroy();
function SaveToFile(alias: string; file: string): integer;
function Transform();
function GetPdf(): PdfFile;
function GetCurrentSectWare(): TSSectWare;
function GetCurrentTextPoint(): Point;
function GetCurrentHdrPoint(): Point;
function GetCurrentFtrPoint(): Point;
function GetCurrentNoteWare(): TSNoteWare;
function GetCachePath(image_path: string): string;
function GetNextPage(page: TSPage): TSPage;
function GetCurrentXmlFile(): string;
function AddTSPage(flag: boolean): TSPage;
function LinkToToc(anchor: string; page: TSPage; left: real; top: real);
function AddToc(anchor: string; toc: TSToc);
function SetHeaderAndFooter();
function ProcessNumpages();
function CalculateTextCoordinates(): array of real;
function GetSymbol(symbol: string);
function AddDocxPage(pg: TSPage; r: R);
function AddColIndex();
function UpdateDocxPageNumpages();
function SaveDocxFile();overload;
function SaveDocxFile(alias: string; file: string);overload;
property Font read ReadFont;
function ReadFont();
private
function InitDocxComponents(alias: string; file: string);
function InitCachePath(file: string);
function InitPdfEncoder();
function InitSectWare();
function InitSymbol();
function AllocateElementsToSectWare();
function ClassifyCols(var point: Point; cols: Cols);
function SetHdr(type: string);
function SetFtr(type: string);
function TransformP(var point: Point; paragraph: P; w: real; lb: real);
function TransformTbl(var point: Point; table: Tbl; w: real; lb: real);
function TransformSdt(var point: Point; sdt: Sdt; w: real; lb: real);
function PrintGrid(page: PdfPage; sect_ware: TSSectWare); // test
private
pdf_: PdfFile;
docx_components_ware_: TSDocxComponentsWare; // TSDocxComponentsWare
cache_path_: string; // 临时目录,用来存放临时文件
font_ware_: TSFontWare; // 字体部件
sect_ware_array_: array of TSSectWare; // 页面布局部件数组
current_sect_ware_: TSSectWare;
current_sect_pr_adapter_: SectPrAdapter;
symbol_: tableArray;
current_page_: TSPage;
page_array_: array of TSPage;
toc_array_: tableArray;
toc_unmacthed_array_: tableArray;
range_page_number_array_: tableArray;
text_point_: Point; // 定位坐标点
hdr_point_: Point; // 页眉坐标
ftr_point_: Point; // 页脚坐标
xml_file_: string;
even_and_odd_flag_: boolean;
note_ware_: TSNoteWare; // 脚注/尾注
// 回写docx
docx_page_arr_: tableArray;
update_docx_pages_: boolean;
end;
type Point = class
function Create();
begin
{self.}X := 0;
{self.}Y := 0;
end
X: real;
Y: real;
end;
function TSDocxToPdf.Create(alias: string; file: string);
begin
pdf_ := new PdfFile();
font_ware_ := new TSFontWare(pdf_);
{self.}InitPdfEncoder();
{self.}InitDocxComponents(alias, file);
{self.}InitCachePath(file);
{self.}InitSectWare();
{self.}InitSymbol();
current_page_ := nil;
page_array_ := array();
toc_array_ := array();
toc_unmacthed_array_ := array();
range_page_number_array_ := array();
text_point_ := new Point();
hdr_point_ := new Point();
ftr_point_ := new Point();
xml_file_ := "document.xml";
settings := docx_components_ware_.Settings;
settings.XmlChildEvenAndOddHeaders.Deserialize();
even_and_odd_flag_ := settings.EvenAndOddHeaders ? true : false;
note_ware_ := nil;
// 回写docx
docx_page_arr_ := array();
update_docx_pages_ := false;
end;
function TSDocxToPdf.Destroy();
begin
removeDir("", cache_path_);
end;
function TSDocxToPdf.SaveToFile(alias: string; file: string): integer;
begin
return pdf_.SaveToFile(alias, file);
end;
function TSDocxToPdf.Transform();
begin
for _,sect_ware in sect_ware_array_ do
begin
if current_sect_ware_ <> sect_ware then
begin
current_sect_ware_ := sect_ware;
current_sect_pr_adapter_ := new SectPrAdapter(current_sect_ware_.SectPr.GetObject());
{self.}AddTSPage(true);
xml_file_ := "document.xml";
end
// 分栏
elements := sect_ware.Elements();
cols := current_sect_ware_.SectPr.Cols;
if cols.Num > 1 then
begin
columns := {self.}ClassifyCols(text_point_, cols);
end
else begin
w := current_sect_ware_.SectPr.PgSz.W - current_sect_ware_.SectPr.PgMar.Right - current_sect_ware_.SectPr.PgMar.Left;
lb := current_sect_ware_.SectPr.PgMar.Bottom;
for _,element in elements do
begin
// if _ = 8 then break;
// if _ = 3 then
// println("_ = {}, xml_file_ = {}", _, xml_file_);
if element.LocalName = "p" then {self.}TransformP(text_point_, element, w, lb);
else if element.LocalName = "tbl" then {self.}TransformTbl(text_point_, element, w, lb);
else if element.LocalName = "sdt" then {self.}TransformSdt(text_point_, element, w, lb);
end
end
end
{self.}ProcessNumpages();
end;
function TSDocxToPdf.GetCurrentTextPoint(): Point;
begin
return text_point_;
end;
function TSDocxToPdf.GetCurrentHdrPoint(): Point;
begin
return hdr_point_;
end;
function TSDocxToPdf.GetCurrentFtrPoint(): Point;
begin
return ftr_point_;
end;
function TSDocxToPdf.GetCurrentSectWare(): TSSectWare;
begin
return current_sect_ware_;
end;
function TSDocxToPdf.GetCachePath(image_path: string): string;
begin
return cache_path_ + extractFileName(image_path);
end;
function TSDocxToPdf.GetCurrentXmlFile(): string;
begin
return xml_file_;
end;
function TSDocxToPdf.GetPdf(): PdfFile;
begin
return pdf_;
end;
function TSDocxToPdf.ReadFont();
begin
return font_ware_;
end;
function TSDocxToPdf.InitPdfEncoder();
begin
pdf_.UseCNSFonts();
pdf_.UseCNSEncodings();
// pdf_.UseUTFEncodings();
end;
function TSDocxToPdf.InitDocxComponents(alias: string; file: string);
begin
namespace "DOCX";
docx_components_ware_ := new TSDocxComponentsWare();
[err, msg] := docx_components_ware_.Open(alias, file, nil);
if err then raise "Open file error.";
end;
function TSDocxToPdf.InitCachePath(file: string);
begin
path := format("%s_%s", extractFileName(file), formatDatetime("YYYYMMDDHHNNSSZZZ", now()));
cache_dir := format("%s/funcext/PdfConverter/.cache", extractFileDir(sysExecName()));
createDir("", cache_dir);
cache_path_ := format("%s/%s/", cache_dir, path);
createDir("", cache_path_);
end;
function TSDocxToPdf.InitSectWare();
begin
sect_ware_array_ := array();
current_sect_ware_ := nil;
current_sect_pr_adapter_ := nil;
document := docx_components_ware_.Document;
document.Deserialize();
{self.}AllocateElementsToSectWare();
end;
function TSDocxToPdf.InitSymbol();
begin
symbol_ := array(
"": chr(118),
"": chr(110),
"": chr(108),
"": chr(117),
"": chr(108),
"o": chr(109),
"": chr(110),
);
end;
function TSDocxToPdf.AllocateElementsToSectWare();
begin
elements := docx_components_ware_.Document.Body.Elements();
ware := new TSSectWare();
fp := function(ware, sect);
begin
sect := new SectPrUnitDecorator(sect);
sect.PgSz.Orient := sect.PgSz.Orient ? "portrait" : "landscape";
sect.Type.Val := sect.Type.Val ?: "nextPage";
ware.SectPr := sect;
ware.Do();
end
for i:=0 to length(elements)-1 do
begin
element := elements[i];
ware.AddElement(element);
if element.LocalName = "p" and ifObj(element.PPr.SectPr) then
begin
##fp(ware, element.PPr.SectPr);
sect_ware_array_[length(sect_ware_array_)] := ware;
ware := new TSSectWare();
end
else if element.LocalName = "sectPr" and i = length(elements)-1 then
begin
##fp(ware, element);
sect_ware_array_[length(sect_ware_array_)] := ware;
end
end
// println("sect_ware_array_ = {}", sect_ware_array_);
end;
function TSDocxToPdf.AddTSPage(flag: boolean = false): TSPage;
begin
if current_sect_ware_.SectPr.Type.Val = "continuous" and length(page_array_) <> 0 then return;
page := pdf_.AddPage();
page.SetWidth(current_sect_ware_.SectPr.PgSz.W);
page.SetHeight(current_sect_ware_.SectPr.PgSz.H);
// println("W = {}, H = {}", current_sect_ware_.SectPr.PgSz.W, current_sect_ware_.SectPr.PgSz.H);
len := length(page_array_);
current_page_ := new TSPage();
current_page_.Index := len;
current_page_.PdfPage := page;
current_page_.Number := flag ? ifnil(current_sect_ware_.SectPr.PgNumType.Start) ? 1 : current_sect_ware_.SectPr.PgNumType.Start : page_array_[len-1].Number + 1;
// println("len = {}, Number = {}", len, current_page_.Number);
page_array_[len] := current_page_;
// 页眉页脚
if current_sect_ware_.SectPr.TitlePg and current_page_.Index = 0 then
type_name := "first";
else if not even_and_odd_flag_ then
type_name := "default";
else if not odd(current_page_.Index) then
type_name := "even"
else
type_name := "default";
bk_file := xml_file_;
{self.}SetHdr(type_name);
{self.}SetFtr(type_name);
xml_file_ := bk_file;
// 正文坐标
[x, y] := {self.}CalculateTextCoordinates();
text_point_.X := x;
text_point_.Y := y;
if sysparams["_PDF_PAGE_GRID_DEBUG_"] then
{self.}PrintGrid(page, current_sect_ware_);
return current_page_;
end;
function TSDocxToPdf.CalculateTextCoordinates(): array of real;
begin
x := current_sect_ware_.SectPr.PgMar.Left;
y := min(current_sect_ware_.SectPr.PgSz.H - max(current_sect_ware_.SectPr.PgMar.Top, current_sect_ware_.SectPr.PgMar.Header), hdr_point_.Y);
return array(x, y);
end;
function TSDocxToPdf.SetFtr(type: string);
begin
ftr_point_.X := current_sect_ware_.SectPr.PgMar.Left;
ftr_point_.Y := current_sect_ware_.SectPr.PgMar.Bottom;
footer_reference := current_sect_pr_adapter_.GetFooterReferenceByType(type);
if ifObj(footer_reference) then
begin
rels_adapter := docx_components_ware_.GetDocumentRelsAdapter();
rel := rels_adapter.GetRelationshipById(footer_reference.Id);
w := current_sect_ware_.SectPr.PgSz.W - current_sect_ware_.SectPr.PgMar.Right - current_sect_ware_.SectPr.PgMar.Left;
lb := 0;
obj := docx_components_ware_.GetFtr(rel.Target);
xml_file_ := rel.Target;
elements := obj.Elements();
for _,element in elements do
begin
if element.LocalName = "p" then {self.}TransformP(ftr_point_, element, w, lb);
end
end
end;
function TSDocxToPdf.SetHdr(type: string);
begin
hdr_point_.X := current_sect_ware_.SectPr.PgMar.Left;
hdr_point_.Y := current_sect_ware_.SectPr.PgSz.H - current_sect_ware_.SectPr.PgMar.Header;
header_reference := current_sect_pr_adapter_.GetHeaderReferenceByType(type);
if ifObj(header_reference) then
begin
rels_adapter := docx_components_ware_.GetDocumentRelsAdapter();
rel := rels_adapter.GetRelationshipById(header_reference.Id);
w := current_sect_ware_.SectPr.PgSz.W - current_sect_ware_.SectPr.PgMar.Right - current_sect_ware_.SectPr.PgMar.Left;
lb := 0;
obj := docx_components_ware_.GetHdr(rel.Target);
xml_file_ := rel.Target;
elements := obj.Elements();
for _,element in elements do
begin
if element.LocalName = "p" then {self.}TransformP(hdr_point_, element, w, lb);
end
end
end;
function TSDocxToPdf.TransformP(var point: Point; paragraph: P; w: real; lb: real);
begin
range := new TSPdfParagraphRange(self, current_page_, docx_components_ware_, paragraph);
range.StartX := point.X;
range.StartY := point.Y;
range.Width := w;
range.LowerBound := lb;
r := range.Calc();
if r then range.Do();
else range_page_number_array_[length(range_page_number_array_)] := range;
point.Y := range.EndY;
end;
function TSDocxToPdf.TransformTbl(var point: Point; table: Tbl; w: real; lb: real);
begin
range := new TSPdfTableRange(self, current_page_, docx_components_ware_, table);
range.StartX := point.X;
range.StartY := point.Y;
range.Width := w;
range.LowerBound := lb;
range.Calc();
range.Do();
point.Y := range.EndY;
end;
function TSDocxToPdf.TransformSdt(var point: Point; sdt: Sdt; w: real; lb: real);
begin
ps := sdt.SdtContent.Ps();
for _,p in ps do
{self.}TransformP(point, p, w, lb);
end;
function TSDocxToPdf.ProcessNumpages();
begin
nums := page_array_[length(page_array_)-1].Number;
for _,range in range_page_number_array_ do
begin
range.SetNumPages(nums);
range.RangesToLines();
range.Do();
end
end;
function TSDocxToPdf.GetNextPage(page: TSPage);
begin
return page_array_[page.Index + 1];
end;
function TSDocxToPdf.PrintGrid(page: PdfPage; sect_ware: TSSectWare);
begin
i := 0;
while true do
begin
y := text_point_.Y - i * sect_ware.SectPr.DocGrid.LinePitch;
if y <= sect_ware.SectPr.PgMar.Bottom then break;
page.SetLineWidth(0.05);
page.SetGrayStroke(0.75);
page.MoveTo(sect_ware.SectPr.PgMar.Left, y);
page.LineTo(sect_ware.SectPr.PgSz.W- sect_ware.SectPr.PgMar.Right, y);
page.Stroke();
i++;
end
x1 := sect_ware.SectPr.PgMar.Left;
y1 := sect_ware.SectPr.PgSz.H - sect_ware.SectPr.PgMar.Top;
x2 := sect_ware.SectPr.PgSz.W - sect_ware.SectPr.PgMar.Right;
y2 := y1;
x3 := x1;
y3 := sect_ware.SectPr.PgMar.Bottom;
x4 := x2;
y4 := y3;
page.SetLineWidth(0.05);
page.SetGrayStroke(0.5);
page.MoveTo(x1, y1);
page.LineTo(x2, y2);
page.Stroke();
page.MoveTo(x1, y1);
page.LineTo(x3, y3);
page.Stroke();
page.MoveTo(x2, y2);
page.LineTo(x4, y4);
page.Stroke();
page.MoveTo(x3, y3);
page.LineTo(x4, y4);
page.Stroke();
end;
function TSDocxToPdf.AddToc(anchor: string; toc: TSToc);
begin
if ifarray(toc_array_[anchor]) then toc_array_[anchor] union= array(toc);
else toc_array_[anchor] := array(toc);
if toc_unmacthed_array_[anchor] then
begin
toc := toc_unmacthed_array_[anchor];
{self.}LinkToToc(anchor, toc[0], toc[1], toc[2]);
toc_unmacthed_array_[anchor] := nil;
end
end;
function TSDocxToPdf.LinkToToc(anchor: string; page: TSPage; left: real; top: real);
begin
arr := toc_array_[anchor];
if ifnil(arr) then
begin
toc_unmacthed_array_[anchor] := array(page, left, top);
return;
end
dst := page.PdfPage.CreateDestination();
dst.SetXYZ(left, top, 1);
for _,toc in arr do
toc.LinkAnnot(dst);
toc.AddPageNumber(page);
if update_docx_pages_ then
begin
r := docx_page_arr_[anchor];
r.T.Text := page.Number;
r.Serialize();
end
end;
function TSDocxToPdf.GetSymbol(symbol: string);
begin
// println("symbol = {}, symbol_ = {}", symbol, symbol_);
return symbol_[symbol];
end;
function TSDocxToPdf.AddDocxPage(anchor: string; r: R);
begin
docx_page_arr_[anchor] := r;
end;
function TSDocxToPdf.UpdateDocxPageNumpages();
begin
update_docx_pages_ := true;
end;
function TSDocxToPdf.SaveDocxFile();overload;
begin
return docx_components_ware_.Save();
end;
function TSDocxToPdf.SaveDocxFile(alias: string; file: string);overload;
begin
return docx_components_ware_.SaveAs(alias, file);
end;
function TSDocxToPdf.GetCurrentNoteWare(): TSNoteWare;
begin
if ifnil(note_ware_) then note_ware_ := new TSNoteWare(current_sect_ware_);
return note_ware_;
end;
function TSDocxToPdf.ClassifyCols(var point: Point; cols: Cols);
begin
bk_page := current_page_;
columns := array();
x := point.X;
y := point.Y;
w := current_sect_ware_.SectPr.PgSz.W - current_sect_ware_.SectPr.PgMar.Right - current_sect_ware_.SectPr.PgMar.Left;
lb := current_sect_ware_.SectPr.PgMar.Bottom;
w_array := array();
ccols := cols.Cols();
for i:=0 to cols.Num-1 do
begin
rw := 0;
if cols.EqualWidth = "0" then
begin
rw := ccols[i].W;
if i > 0 then x += ccols[i-1].W + ccols[i-1].Space;
end
else begin
rw := w / 3;
x := point.X + i * rw + i * cols.Space;
end
range := new TSPdfColumnRange(self, current_page_, docx_components_ware_);
range.StartX := x;
range.StartY := y;
range.Width := rw;
range.LowerBound := lb;
columns[length(columns)] := range;
end
i := 0;
elements := current_sect_ware_.Elements();
range := columns[0];
for _,element in elements do
begin
if element.LocalName = "p" then
begin
p := new P();
sub_elements := element.Elements();
for _,sub in sub_elements do
begin
p.AppendChild(sub);
if sub.LocalName = "r" and sub.Br.Type = "column" then
begin
range.AddElement(p);
p := new P();
p.PPr.Copy(element.PPr);
range := columns[++i];
end
end
range.AddElement(p);
end
else begin
range.AddElement(element);
end
end
pg := nil;
max_y := nil;
for _,column in columns do
begin
column.Do();
page := column.GetLastPage();
if ifnil(pg) then pg := page;
if ifnil(max_y) then max_y := range.EndY;
if page.Index > pg.Index then max_y := range.EndY;
else if page.Index = pg.Index and max_y > range.EndY then max_y := range.EndY;
end
current_page_ := bk_page;
point.Y := max_y;
end;