sco-bot: webvtt_1.py comparison

start

comparison

equal deleted inserted replaced

-:82428652cda1
+:ba1ce5921a4b
+import webvtt
+# for caption in webvtt.read('data/Albany City Council - June 17, 2024 [Ywf6cOduPNI].en.vtt'):
+#     print(caption.start)
+#     print(caption.end)
+#     print(caption.text)
+vtt=webvtt.read('data/Albany City Council - June 17, 2024 [Ywf6cOduPNI].en.vtt')
+# for line in vtt[0].lines:
+#     print(line)
+# for caption in vtt.iter_slice(start='00:01:11.000', end='00:02:27.000'):
+#     print(caption.start)
+#     print(caption.end)
+#     print(caption.text)
+transcript = ""
+lines = []
+for line in vtt:
+# Strip the newlines from the end of the text.
+# Split the string if it has a newline in the middle
+# Add the lines to an array
+lines.extend(line.text.strip().splitlines())
+# Remove repeated lines
+previous = None
+for line in lines:
+if line == previous:
+continue
+transcript += " " + line
+previous = line
+print(transcript)
+# text = lobj.get_text()
+#                 for sentence in nltk.sent_tokenize(text):
+#                     sentence = re.sub(r'\s+', ' ', sentence).strip()
+#                     if len(sentence) < 5:
+#                         continue

Mercurial > code > home > repos > sco-bot