这将打印它们(两次),按名称排序,然后按pdf下的页面位置排序。一个大样本PDF包含命名目的地
#!/usr/bin/env python
import sys
from pyPdf import PdfFileReader
def pdf_get_anchors(fh):
reader = PdfFileReader(fh)
destinations = reader.getNamedDestinations() #completely unsorted order, does not include pagenums
L=list();
for PageNum in range(1,reader.numPages+1) :
ThisPage = reader.getPage(PageNum-1)
PageTop = ThisPage['/MediaBox'][3]
for name in destinations:
ThisDest = destinations[name]
ThisDestPage = ThisDest.page.getObject()
if ThisDestPage == ThisPage: #have to do this to identify the pagenum
DownPage = (PageTop - ThisDest.top) / PageTop # calc fraction of page down
Position = PageNum + DownPage # a sortable number down the whole pdf
L.append((name, PageNum, Position)); # put everything in a sortable list
return L, len (destinations), reader.getNumPages()
def pdf_print_anchors ( L ) :
for dest in L :
name=dest[0]
PageNum=dest[1]
Position= round(dest[2]*100)/100
print "%-8.2f % %s" % Position % name #ThisDest.title
#print ThisDest.title, " ", PageNum, round(Position*100)/100
HeaderLine="\n Page Name\n"
L, NumDests, NumPages =pdf_get_anchors(open(sys.argv[1],'rb'))
print HeaderLine
L.sort(key=lambda dest: dest[0]) #sort name order
pdf_print_anchors(L);
print HeaderLine
L.sort(key=lambda dest: dest[2]) #sort in order down the pdf
pdf_print_anchors(L);
print HeaderLine
print "Number of NamedDestinations: ", NumDests, "NumPages: ", NumPages