Browse Source
Retrieving the flux state in the webpage. This state contains a JSON file containing all the informations we need.master
commit
55784c07bc
3 changed files with 33 additions and 0 deletions
@ -0,0 +1,5 @@
|
||||
let |
||||
nixpkgs = import <nixpkgs> {}; |
||||
pkgs = nixpkgs.pkgs; |
||||
in pkgs.writers.writePython3Bin "scrape" { libraries = [ pkgs.python3Packages.beautifulsoup4 ]; } |
||||
(builtins.readFile ./lbc.py) |
@ -0,0 +1,26 @@
|
||||
from bs4 import BeautifulSoup |
||||
import sys |
||||
import json |
||||
from signal import signal, SIGPIPE, SIG_DFL |
||||
|
||||
if __name__ == '__main__': |
||||
page = sys.stdin.read() |
||||
soup = BeautifulSoup(page, "html.parser") |
||||
script = [s.string.strip() for s in soup.find_all('script') |
||||
if s.string is not None |
||||
and s.string.strip().startswith("window.__REDIAL_PROPS__")] |
||||
if len(script) != 1: |
||||
print("Cannot find window.__REDIAL_PROPS__ script in which we are \ |
||||
supposed to retrieve the data json", file=sys.stderr) |
||||
sys.exit(1) |
||||
lbcjsonstart = script[0].find('[') |
||||
lbcjson = json.loads(script[0][lbcjsonstart:]) |
||||
ads = [obj["data"]["ads"] for obj in lbcjson |
||||
if type(obj) == dict and "data" in obj |
||||
and "ads" in obj["data"]] |
||||
if len(ads) != 1: |
||||
print("Cannot find the ads section in flux state") |
||||
sys.exit(1) |
||||
print(json.dumps(ads[0])) |
||||
signal(SIGPIPE, SIG_DFL) |
||||
sys.exit(0) |
Loading…
Reference in new issue