From 451a049d465a188b2bc0e4a5c66c4881c7c3af47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Sat, 29 Jun 2019 02:01:26 +0200 Subject: [PATCH] Implement fetching website metadata --- bookmarks/migrations/0001_initial.py | 6 +++--- bookmarks/models.py | 4 ++-- bookmarks/services/bookmarks.py | 25 +++++++++++++++++++++---- requirements.txt | 8 ++++++++ 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/bookmarks/migrations/0001_initial.py b/bookmarks/migrations/0001_initial.py index 64074e8..a82f3f5 100644 --- a/bookmarks/migrations/0001_initial.py +++ b/bookmarks/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 2.2.2 on 2019-06-28 22:16 +# Generated by Django 2.2.2 on 2019-06-28 23:49 from django.conf import settings from django.db import migrations, models @@ -21,8 +21,8 @@ class Migration(migrations.Migration): ('url', models.URLField()), ('title', models.CharField(max_length=512)), ('description', models.TextField()), - ('website_title', models.CharField(max_length=512)), - ('website_description', models.TextField()), + ('website_title', models.CharField(blank=True, max_length=512, null=True)), + ('website_description', models.TextField(blank=True, null=True)), ('unread', models.BooleanField(default=True)), ('date_added', models.DateTimeField()), ('date_modified', models.DateTimeField()), diff --git a/bookmarks/models.py b/bookmarks/models.py index ecca4b4..748c72e 100644 --- a/bookmarks/models.py +++ b/bookmarks/models.py @@ -7,8 +7,8 @@ class Bookmark(models.Model): url = models.URLField() title = models.CharField(max_length=512) description = models.TextField() - website_title = models.CharField(max_length=512) - website_description = models.TextField() + website_title = models.CharField(max_length=512, blank=True, null=True) + website_description = models.TextField(blank=True, null=True) unread = models.BooleanField(default=True) date_added = models.DateTimeField() date_modified = models.DateTimeField() diff --git a/bookmarks/services/bookmarks.py b/bookmarks/services/bookmarks.py index df40ac7..8817a7b 100644 --- a/bookmarks/services/bookmarks.py +++ b/bookmarks/services/bookmarks.py @@ -1,3 +1,5 @@ +import requests +from bs4 import BeautifulSoup from django.contrib.auth.models import User from django.utils import timezone @@ -24,7 +26,22 @@ def update_bookmark(bookmark: Bookmark): def _update_website_metadata(bookmark: Bookmark): - # TODO: Load website metadata - bookmark.website_title = 'Title from website' - bookmark.website_description = 'Description from website' - pass + # noinspection PyBroadException + try: + page_text = load_page(bookmark.url) + soup = BeautifulSoup(page_text, 'html.parser') + + title = soup.title.string if soup.title is not None else None + description_tag = soup.find('meta', attrs={'name': 'description'}) + description = description_tag['content'] if description_tag is not None else None + + bookmark.website_title = title + bookmark.website_description = description + except Exception: + bookmark.website_title = None + bookmark.website_description = None + + +def load_page(url: str): + r = requests.get(url) + return r.text diff --git a/requirements.txt b/requirements.txt index 111e4d9..4a8de07 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,11 @@ +beautifulsoup4==4.7.1 +certifi==2019.6.16 +chardet==3.0.4 Django==2.2.2 +django-picklefield==2.0 +idna==2.8 pytz==2019.1 +requests==2.22.0 +soupsieve==1.9.2 sqlparse==0.3.0 +urllib3==1.25.3