Mercurial > code > home > repos > sco-bot
changeset 11:6622bacb0b84
first pass at reorg
author | drewp@bigasterisk.com |
---|---|
date | Thu, 11 Jul 2024 18:15:44 -0700 |
parents | 13438795d896 |
children | 7f36497bfac3 |
files | Dockerfile.server Dockerfile.web env flow/build_index.py flow/download.py flow/env flow/local_types.py flow/pyproject.toml flow/schema.py flow/search_index.py k8s/deploy.yaml scobot/index/access.py scobot/index/build_index_flow.py scobot/index/download_tasks.py scobot/index/schema.py scobot/local_types.py scobot/service/query.py search/Dockerfile search/doc.py search/extract_agenda.py search/extract_all.py search/extract_pdf.py search/meeting_docs.py search/query.py search/search_apex.py search/search_apex_rebuild.py skaffold.yaml web/Dockerfile |
diffstat | 27 files changed, 279 insertions(+), 428 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Dockerfile.server Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,11 @@ +FROM reg:5000/base_basic + +WORKDIR /opt + +COPY pyproject.toml pdm.lock ./ +RUN pdm sync + +RUN pdm run python -c 'import nltk; nltk.download("punkt")' + +COPY env ./ +COPY scobot/** ./scobot/ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Dockerfile.web Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,9 @@ +FROM reg:5000/base_basic + +WORKDIR /opt + +COPY web/package.json web/pnpm-lock.yaml ./ +RUN cd web; pnpm install + +COPY web/vite.config.ts web/tsconfig.json ./ +COPY web/src/ ./src/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,2 @@ +PREFECT_API_URL=http://127.0.0.1:4200/api +PREFECT_HOME=./prefect
--- a/flow/build_index.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,81 +0,0 @@ -from pathlib import Path -from typing import Iterable, cast - -import search_index -from download import getCityMutableJson, getCityPermanent -from local_types import MeetingRow, Url -from lxml.html import fromstring -from prefect import flow, task -from prefect.logging import get_run_logger -from search_index import SearchIndex - -log = None - - -@task() -def meetingListUrls() -> Iterable[Url]: - return [ - "https://albanyca.primegov.com/api/v2/PublicPortal/ListArchivedMeetings?year=2024", - "https://albanyca.primegov.com/api/v2/PublicPortal/ListUpcomingMeetings", - ] - - -def meetingAgendaUrl(mtg: MeetingRow) -> Url: - for doc in mtg.get('documentList', []): - if doc['templateName'] == 'HTML Agenda Packet': - tid = doc['templateId'] - return f'https://albanyca.primegov.com/Portal/Meeting?meetingTemplateId={tid}' - raise ValueError(f"no agenda doc found for {mtg['id']=}") - - -def extractMeetingText(mhtml: str) -> list[str]: - el = fromstring(mhtml) - m = el.cssselect('div#meetingSection')[0] - for st in m.cssselect('style'): - st.clear() - meetingText = [ - chunk for chunk in m.xpath('.//text()', namespaces=el.nsmap) - if chunk.strip() - ] - return meetingText - - -def addMeeting(index: SearchIndex, mtg: MeetingRow): - try: - agendaUrl = meetingAgendaUrl(mtg) - except ValueError: - pass - else: - html = getCityPermanent(agendaUrl) - text = extractMeetingText(html) - # todo group phrases - index.addDoc(title=f'{mtg["date"]} {mtg["title"]}', content=text) - - try: - videoUrl = mtg['videoUrl'] - if not videoUrl: - raise KeyError - except KeyError: - pass - else: - '''transcribe and index video...''' - - -@flow(persist_result=True) -def buildIndex(): - global log - log = get_run_logger() - search_index.log = log - - index = SearchIndex(Path('/tmp/scoindex')) - for url in meetingListUrls(): - mtgs = cast(list[MeetingRow], getCityMutableJson(url)) - log.info(f'got {len(mtgs)=}') - - for mtg in mtgs: - addMeeting(index, mtg) - index.commit() - - -if __name__ == '__main__': - buildIndex.serve()
--- a/flow/download.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -import datetime -import time -from local_types import Url - -import httpx -from prefect import task -from prefect.artifacts import create_link_artifact - - -@task( - task_run_name=lambda: f'getHttp-{int(time.time())}', - cache_key_fn=lambda _, args: f'getHttp-{args["url"]}', - cache_expiration=datetime.timedelta(seconds=86400), - tags=['city'], # todo ratelimit based on tag -) -def getCityMutableJson(url: Url): - create_link_artifact("get", url) - req = httpx.get(url) # todo async - req.raise_for_status() - return req.json() - - -@task(task_run_name=lambda: f'getHttp-{int(time.time())}', - cache_key_fn=lambda _, args: f'getHttp-{args["url"]}', - tags=['city']) -def getCityPermanent(url: Url) -> str: - create_link_artifact("get", url) - req = httpx.get(url) - req.raise_for_status() - return req.text - - -@task -def getYoutubePermanent(url: str): - time.sleep(5) - return 'video' * 10000
--- a/flow/env Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -PREFECT_API_URL=http://127.0.0.1:4200/api -PREFECT_HOME=./prefect
--- a/flow/local_types.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -from typing import NewType - - -Url = NewType('Url', str) -MeetingRow = NewType('MeetingRow', dict) \ No newline at end of file
--- a/flow/pyproject.toml Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -[project] -name = "flow" -version = "0.1.0" -description = "Default template for PDM package" -authors = [ - {name = "", email = ""}, -] -dependencies = [ - "prefect>=2.19.7", - "lxml>=5.2.2", - "httpx>=0.27.0", - "cssselect>=1.2.0", - "whoosh>=2.7.4", - "ipython>=8.26.0", -] -requires-python = "==3.11.*" -readme = "README.md" -license = {text = "MIT"} - - -[tool.pdm] -distribution = false - -[tool.pdm.scripts] -_.env_file = "env" -run_prefect_server = "prefect server start" -run_build_flow = "python build_index.py" -start_build = "prefect deployment run buildIndex/buildIndex" \ No newline at end of file
--- a/flow/schema.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -from whoosh.fields import TEXT, Schema - - -schema = Schema(title=TEXT(stored=True), content=TEXT(stored=True)) \ No newline at end of file
--- a/flow/search_index.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -from pathlib import Path - -from whoosh.fields import ID -from whoosh.index import create_in - -from schema import schema - -log = None # set by flow - - -class SearchIndex: - - def __init__(self, indexDir: Path): - indexDir.mkdir(parents=True, exist_ok=True) - self.ix = create_in(indexDir, schema) - self.writer = self.ix.writer() - - def addDoc(self, **kw): - self.writer.add_document(**kw) - - def commit(self): - self.writer.commit() - with self.ix.searcher() as searcher: - log.info(f'index doc count = {searcher.doc_count()}')
--- a/k8s/deploy.yaml Thu Jul 11 17:35:31 2024 -0700 +++ b/k8s/deploy.yaml Thu Jul 11 18:15:44 2024 -0700 @@ -26,8 +26,8 @@ - vite ports: - containerPort: 8002 - - name: search - image: reg:5000/sco_bot_search + - name: server + image: reg:5000/sco_bot_server workingDir: /opt command: - pdm @@ -38,7 +38,7 @@ - "0.0.0.0" - --port - "8001" - - search/query.py + - scobot/service/query.py ports: - containerPort: 8001 volumeMounts:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scobot/index/access.py Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,23 @@ +from pathlib import Path + +from whoosh.index import create_in + +from scobot.index.schema import schema + +log = None # set by flow + + +class SearchIndex: + + def __init__(self, indexDir: Path): + indexDir.mkdir(parents=True, exist_ok=True) + self.ix = create_in(indexDir, schema) + self.writer = self.ix.writer() + + def addDoc(self, **kw): + self.writer.add_document(**kw) + + def commit(self): + self.writer.commit() + with self.ix.searcher() as searcher: + log.info(f'index doc count = {searcher.doc_count()}')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scobot/index/build_index_flow.py Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,105 @@ +import json +import re +from pathlib import Path +from typing import Iterable, cast + +import lxml.html +import nltk +from prefect import flow, task +from prefect.logging import get_run_logger + +import scobot.index.access +from scobot.index.access import SearchIndex +from scobot.index.download_tasks import getCityMutableJson, getCityPermanent +from scobot.local_types import MeetingRow, Url + +log = None + + +@task() +def meetingListUrls() -> Iterable[Url]: + return [ + "https://albanyca.primegov.com/api/v2/PublicPortal/ListArchivedMeetings?year=2024", + "https://albanyca.primegov.com/api/v2/PublicPortal/ListUpcomingMeetings", + ] + + +def meetingAgendaUrl(mtg: MeetingRow) -> Url: + for doc in mtg.get('documentList', []): + if doc['templateName'] == 'HTML Agenda Packet': + tid = doc['templateId'] + return f'https://albanyca.primegov.com/Portal/Meeting?meetingTemplateId={tid}' + raise ValueError(f"no agenda doc found for {mtg['id']=}") + + +def extractMeetingText(mhtml: str) -> list[str]: + el = lxml.html.fromstring(mhtml) + m = el.cssselect('div#meetingSection')[0] + for st in m.cssselect('style'): + st.clear() + meetingText = [ + chunk for chunk in m.xpath('.//text()', namespaces=el.nsmap) + if chunk.strip() + ] + return meetingText + + +def phrasesFromFile(p: Path) -> Iterable[dict]: + mtg = json.loads(p.read_text()) + print(f' has {len(mtg["phrases"])} phrases') + text = ' '.join(mtg['phrases']) + + i = 0 + for sentence in nltk.sent_tokenize(text): + sentence = re.sub(r'\s+', ' ', sentence).strip() + if len(sentence) < 5: + continue + if not re.search(r'\w\w\w\w\w', sentence): + continue + + yield dict(id=f"{mtg['mtg']['id']}_sentence{i}", + title=f"{mtg['mtg']['date']} {mtg['mtg']['title']}", + sourceFile=str(p), + posJson="[]", + phrase=sentence) + + +def addMeeting(index: SearchIndex, mtg: MeetingRow): + try: + agendaUrl = meetingAgendaUrl(mtg) + except ValueError: + pass + else: + html = getCityPermanent(agendaUrl) + text = extractMeetingText(html) + # todo group phrases phrasesFromFile + index.addDoc(title=f'{mtg["date"]} {mtg["title"]}', content=text) + + try: + videoUrl = mtg['videoUrl'] + if not videoUrl: + raise KeyError + except KeyError: + pass + else: + '''transcribe and index video...''' + + +@flow(persist_result=True) +def buildIndex(): + global log + log = get_run_logger() + scobot.index.access.log = log + + index = SearchIndex(Path('/tmp/scoindex')) + for url in meetingListUrls(): + mtgs = cast(list[MeetingRow], getCityMutableJson(url)) + log.info(f'got {len(mtgs)=}') + + for mtg in mtgs: + addMeeting(index, mtg) + index.commit() + + +if __name__ == '__main__': + buildIndex.serve()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scobot/index/download_tasks.py Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,37 @@ +import datetime +import time + +import httpx +from prefect import task +from prefect.artifacts import create_link_artifact + +from scobot.local_types import Url + + +@task( + task_run_name=lambda: f'getHttp-{int(time.time())}', + cache_key_fn=lambda _, args: f'getHttp-{args["url"]}', + cache_expiration=datetime.timedelta(seconds=86400), + tags=['city'], # todo ratelimit based on tag +) +def getCityMutableJson(url: Url): + create_link_artifact("get", url) + req = httpx.get(url) # todo async + req.raise_for_status() + return req.json() + + +@task(task_run_name=lambda: f'getHttp-{int(time.time())}', + cache_key_fn=lambda _, args: f'getHttp-{args["url"]}', + tags=['city']) +def getCityPermanent(url: Url) -> str: + create_link_artifact("get", url) + req = httpx.get(url) + req.raise_for_status() + return req.text + + +@task +def getYoutubePermanent(url: str): + time.sleep(5) + return 'video' * 10000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scobot/index/schema.py Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,8 @@ +from whoosh.fields import ID, TEXT, Schema + +schema = Schema( + sourceUrl=ID(stored=True), + sourceTitle=TEXT(stored=True), + posJson=ID(stored=True), + phrase=TEXT(stored=True), +)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scobot/local_types.py Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,5 @@ +from typing import NewType + + +Url = NewType('Url', str) +MeetingRow = NewType('MeetingRow', dict) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scobot/service/query.py Thu Jul 11 18:15:44 2024 -0700 @@ -0,0 +1,70 @@ +import json +from pathlib import Path + +# from pymilvus import MilvusClient +# from milvus_model.dense.onnx import OnnxEmbeddingFunction +from fastapi import FastAPI +from tqdm import tqdm + + +def rebuild(client, embedding_fn, dim): + client.drop_collection(collection_name="demo_collection") + if not client.has_collection(collection_name="demo_collection"): + client.create_collection( + collection_name="demo_collection", + dimension=dim, + ) + + docs = [] + for i, (bbox, phrase) in tqdm(enumerate( + phrasesFromFile( + Path("data") / + "Meetings2226Minutes_20240702182359526 (1).pdf")), + desc="rebuilding", + unit=' phrase'): + [vector] = embedding_fn.encode_documents([phrase]) + doc = { + "id": i, + "vector": vector, + "text": phrase, + "bbox": json.dumps(bbox), + } + docs.append(doc) + res = client.insert(collection_name="demo_collection", data=docs) + print('insert:', res['insert_count']) + + +def xxsearch(q, embedding_fn, client): + query_vectors = embedding_fn.encode_queries([q]) + + [query_result] = client.search( + collection_name="demo_collection", + data=query_vectors, + limit=5, + output_fields=["text"], + ) + query_result.sort(key=lambda x: x["distance"], reverse=True) + + for row in query_result: + print(f'{row["distance"]:.6f} {row["entity"]["text"]}') + + +# q, = sys.argv[1:] + +# https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending +# embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en") +# client = MilvusClient("milvus_demo.db") +# rebuild(client, embedding_fn, dim=embedding_fn.dim) +# search(q, embedding_fn, client) + +app = FastAPI() + +#search = Search() + + +@app.get("/sco/query") +def read_query1(q: str): + results = [] + results = search.search(q) + + return {"results": results}
--- a/search/Dockerfile Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -FROM reg:5000/base_basic - -WORKDIR /opt - -COPY pyproject.toml pdm.lock ./ -RUN pdm sync - -RUN pdm run python -c 'import nltk; nltk.download("punkt")' - -COPY search/** ./search/ \ No newline at end of file
--- a/search/doc.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class Doc: - id: str - title: str - sourceFile: str - posJson: str - phrase: str - - def __getitem__(self, k): - return getattr(self, k) - - pop = __getitem__
--- a/search/extract_agenda.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -import json -import re -from pathlib import Path -from typing import Iterable - -import nltk -from doc import Doc - - -def files() -> Iterable[Path]: - for p in Path('data/albany/meetingId').glob('*/agenda.json'): - print(f'file {p}') - yield p - - -def phrasesFromFile(p: Path) -> Iterable[Doc]: - mtg = json.loads(p.read_text()) - print(f' has {len(mtg["phrases"])} phrases') - text = ' '.join(mtg['phrases']) - - i = 0 - for sentence in nltk.sent_tokenize(text): - sentence = re.sub(r'\s+', ' ', sentence).strip() - if len(sentence) < 5: - continue - if not re.search(r'\w\w\w\w\w', sentence): - continue - - yield Doc(id=f"{mtg['mtg']['id']}_sentence{i}", - title=f"{mtg['mtg']['date']} {mtg['mtg']['title']}", - sourceFile=str(p), - posJson="[]", - phrase=sentence)
--- a/search/extract_all.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -from typing import Iterable - -import extract_agenda -import extract_pdf -from doc import Doc - - -def allDocs() -> Iterable[Doc]: - for mod in [ - #extract_pdf, - extract_agenda - ]: - for src in mod.files(): - yield from mod.phrasesFromFile(src)
--- a/search/extract_pdf.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -import re -from pathlib import Path -from typing import Iterable - -import nltk -from pdfminer.converter import PDFPageAggregator -from pdfminer.layout import LAParams, LTTextBox -from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager -from pdfminer.pdfpage import PDFPage - -def files() -> Iterable[Path]: - for p in Path('data').glob('*.pdf'): - yield p - -def phrasesFromFile(p: Path) -> Iterable: - fp = open(p, 'rb') - rsrcmgr = PDFResourceManager() - laparams = LAParams() - device = PDFPageAggregator(rsrcmgr, laparams=laparams) - interpreter = PDFPageInterpreter(rsrcmgr, device) - pages = PDFPage.get_pages(fp) - - for page in pages: - interpreter.process_page(page) - layout = device.get_result() - for lobj in layout: - if isinstance(lobj, LTTextBox): - text = lobj.get_text() - for sentence in nltk.sent_tokenize(text): - sentence = re.sub(r'\s+', ' ', sentence).strip() - if len(sentence) < 5: - continue - if not re.search(r'\w\w\w\w\w', sentence): - continue - - yield list(map(int, lobj.bbox)), sentence
--- a/search/query.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -from dataclasses import dataclass -import html -import json -from pprint import pprint -import sys -from pathlib import Path -from typing import Iterable - -from tqdm import tqdm - -from pymilvus import MilvusClient -from milvus_model.dense.onnx import OnnxEmbeddingFunction - -from extract_pdf import files, phrasesFromFile - -from fastapi import FastAPI -from search_apex import Search - - -def rebuild(client, embedding_fn, dim): - client.drop_collection(collection_name="demo_collection") - if not client.has_collection(collection_name="demo_collection"): - client.create_collection( - collection_name="demo_collection", - dimension=dim, - ) - - docs = [] - for i, (bbox, phrase) in tqdm(enumerate( - phrasesFromFile( - Path("data") / - "Meetings2226Minutes_20240702182359526 (1).pdf")), - desc="rebuilding", - unit=' phrase'): - [vector] = embedding_fn.encode_documents([phrase]) - doc = { - "id": i, - "vector": vector, - "text": phrase, - "bbox": json.dumps(bbox), - } - docs.append(doc) - res = client.insert(collection_name="demo_collection", data=docs) - print('insert:', res['insert_count']) - - -def xxsearch(q, embedding_fn, client): - query_vectors = embedding_fn.encode_queries([q]) - - [query_result] = client.search( - collection_name="demo_collection", - data=query_vectors, - limit=5, - output_fields=["text"], - ) - query_result.sort(key=lambda x: x["distance"], reverse=True) - - for row in query_result: - print(f'{row["distance"]:.6f} {row["entity"]["text"]}') - - -# q, = sys.argv[1:] - -# https://huggingface.co/models?pipeline_tag=feature-extraction&library=onnx&sort=trending -# embedding_fn = OnnxEmbeddingFunction(model_name="jinaai/jina-embeddings-v2-base-en") -# client = MilvusClient("milvus_demo.db") -# rebuild(client, embedding_fn, dim=embedding_fn.dim) -# search(q, embedding_fn, client) - -app = FastAPI() - -search = Search() - - -@app.get("/sco/query") -def read_query1(q: str): - results = [] - results = search.search(q) - - return {"results": results}
--- a/search/search_apex.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -from pprint import pprint -from typing import Iterable -from apexsearch import ApexSearch - - -class Search: - - def __init__(self): - self.apex = ApexSearch('data/apex', - tables={ - "docs": { - "content": ["phrase"], - "title": "title", - "extras": ["sourceFile", "pos"], - } - }, - id_field='id') - - def rebuild(self, docs: Iterable): - self.apex.build_complete_index(lambda *a: docs) - print('rebuild complete') - - def search(self, q: str): - res = self.apex.search(q, target_number=100) - pprint(res) - for row in res['results']: - yield { - 'title': row['title'], - 'snippetHtml': row['highlighted_content'] - }
--- a/search/search_apex_rebuild.py Thu Jul 11 17:35:31 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ - -import subprocess -from search_apex import Search -from extract_all import allDocs - -subprocess.check_call('rm -rf data/apex', shell=True) -subprocess.check_call('mkdir data/apex', shell=True) -search = Search() -search.rebuild(allDocs()) -subprocess.check_call('chmod -R a+rw data/apex', shell=True)
--- a/skaffold.yaml Thu Jul 11 17:35:31 2024 -0700 +++ b/skaffold.yaml Thu Jul 11 18:15:44 2024 -0700 @@ -4,22 +4,20 @@ name: sco-bot build: artifacts: - - context: . - docker: - dockerfile: web/Dockerfile + - docker: + dockerfile: Dockerfile.web image: reg:5000/sco_bot_web platforms: [amd64] sync: infer: - src/** - - context: . - docker: - dockerfile: search/Dockerfile - image: reg:5000/sco_bot_search + - docker: + dockerfile: Dockerfile.server + image: reg:5000/sco_bot_server platforms: [amd64] sync: infer: - - 'search/**' + - 'scobot/**' tagPolicy: dateTime: format: 2006-01-02_15-04-05