diff options
author | Anton Luka Šijanec <anton@sijanec.eu> | 2024-06-04 15:40:40 +0200 |
---|---|---|
committer | Anton Luka Šijanec <anton@sijanec.eu> | 2024-06-04 15:40:40 +0200 |
commit | 1d0e0e54f663920a0992f1af5bd7d0b37eee537a (patch) | |
tree | 240a15da83a020bbd8b0ba294db20f45308a52c0 /skripti/rš_glasbena_oprema.py | |
parent | Merge branch 'master' of ssh://ni.4a.si/var/lib/git/sijanec/r (diff) | |
download | r-1d0e0e54f663920a0992f1af5bd7d0b37eee537a.tar r-1d0e0e54f663920a0992f1af5bd7d0b37eee537a.tar.gz r-1d0e0e54f663920a0992f1af5bd7d0b37eee537a.tar.bz2 r-1d0e0e54f663920a0992f1af5bd7d0b37eee537a.tar.lz r-1d0e0e54f663920a0992f1af5bd7d0b37eee537a.tar.xz r-1d0e0e54f663920a0992f1af5bd7d0b37eee537a.tar.zst r-1d0e0e54f663920a0992f1af5bd7d0b37eee537a.zip |
Diffstat (limited to '')
-rw-r--r-- | skripti/rš_glasbena_oprema.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/skripti/rš_glasbena_oprema.py b/skripti/rš_glasbena_oprema.py new file mode 100644 index 0000000..8d53702 --- /dev/null +++ b/skripti/rš_glasbena_oprema.py @@ -0,0 +1,31 @@ +#!/usr/bin/python3 +import feedparser +import yt_dlp +from ollama import Client +from bs4 import BeautifulSoup +client = Client(host='http://splet.4a.si:80') +model = "llama2:13b-chat-fp16" +prompt = "The document below is text extracted from a Slovene radio station containing a tracklist. Extract the tracklist form the text below and output a CSV table in format \"artist,track name,album,duration,label\". Example output:\n\nThe Prodigy,Firestarter,The Fat of the Land,4:42,XL\nBJÖRK,LION SONG,,6:16," +def opreme(): + r = [] + for entry in feedparser.parse("https://radiostudent.si/taxonomy/term/589/*/feed").entries: + oprema = {"id": int(entry.id.split(" ")[0]), "title": entry.title, "link": entry.link, "published": entry.published_parsed, "authors": []} + for author in entry.authors: + oprema["authors"].append(author.name) + summary = BeautifulSoup(entry.summary, features="html.parser") + body = None + for i in summary.findAll("div"): + if "class" in i.attrs: + if "field-name-body" in i.attrs["class"]: + body = i + break + if "" + if body == None: + raise Exception("body is None in " + entry.link) + body = body.text.replace("\r", "") + while "\n\n" in body: + body = body.replace("\n\n", "\n") + r.append(oprema) + return r +if __name__ == "__main__": + opreme()
\ No newline at end of file |