Mercurial > code > home > repos > sco-bot
comparison search/extract_pdf.py @ 8:f23b21bd0fce
apex search
author | drewp@bigasterisk.com |
---|---|
date | Sun, 07 Jul 2024 16:26:56 -0700 |
parents | 0e33c65f1904 |
children |
comparison
equal
deleted
inserted
replaced
7:53ae53f7d1b3 | 8:f23b21bd0fce |
---|---|
6 from pdfminer.converter import PDFPageAggregator | 6 from pdfminer.converter import PDFPageAggregator |
7 from pdfminer.layout import LAParams, LTTextBox | 7 from pdfminer.layout import LAParams, LTTextBox |
8 from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager | 8 from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager |
9 from pdfminer.pdfpage import PDFPage | 9 from pdfminer.pdfpage import PDFPage |
10 | 10 |
11 def files() -> Iterable[Path]: | |
12 for p in Path('data').glob('*.pdf'): | |
13 yield p | |
11 | 14 |
12 def phrasesFromFile(p: Path) -> Iterable: | 15 def phrasesFromFile(p: Path) -> Iterable: |
13 fp = open(p, 'rb') | 16 fp = open(p, 'rb') |
14 rsrcmgr = PDFResourceManager() | 17 rsrcmgr = PDFResourceManager() |
15 laparams = LAParams() | 18 laparams = LAParams() |