From 9eefd479c60d13bfbd0e06777823bda897830e80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Sat, 29 Jun 2019 08:42:54 +0200 Subject: [PATCH] Implement basic importer --- .gitignore | 1 + .idea/inspectionProfiles/Project_Default.xml | 12 ++++++ .../management/commands/import_netscape.py | 21 ++++++++++ bookmarks/services/importer.py | 42 +++++++++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 bookmarks/management/commands/import_netscape.py create mode 100644 bookmarks/services/importer.py diff --git a/.gitignore b/.gitignore index 459b73b..620cdae 100644 --- a/.gitignore +++ b/.gitignore @@ -159,3 +159,4 @@ venv.bak/ .mypy_cache/ polls +tmp diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..cbb35d0 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/bookmarks/management/commands/import_netscape.py b/bookmarks/management/commands/import_netscape.py new file mode 100644 index 0000000..5691584 --- /dev/null +++ b/bookmarks/management/commands/import_netscape.py @@ -0,0 +1,21 @@ +from django.contrib.auth.models import User +from django.core.management.base import BaseCommand + +from bookmarks.services.importer import import_netscape_html + + +class Command(BaseCommand): + help = 'Import Netscape HTML bookmark file' + + def add_arguments(self, parser): + parser.add_argument('file', type=str, help='Path to file') + parser.add_argument('user', type=str, help='Name of the user for which to import') + + def handle(self, *args, **kwargs): + filepath = kwargs['file'] + username = kwargs['user'] + with open(filepath) as html_file: + html = html_file.read() + user = User.objects.get(username=username) + + import_netscape_html(html, user) diff --git a/bookmarks/services/importer.py b/bookmarks/services/importer.py new file mode 100644 index 0000000..d39d9b6 --- /dev/null +++ b/bookmarks/services/importer.py @@ -0,0 +1,42 @@ +from datetime import datetime + +from bs4 import BeautifulSoup, Tag +from django.contrib.auth.models import User + +from bookmarks.models import Bookmark + + +def import_netscape_html(html: str, user: User): + soup = BeautifulSoup(html, 'html.parser') + + bookmark_tags = soup.find_all('dt') + + for bookmark_tag in bookmark_tags: + _import_bookmark_tag(bookmark_tag, user) + + +def _import_bookmark_tag(bookmark_tag: Tag, user: User): + link_tag = bookmark_tag.a + + if link_tag is None: + return + + # Either modify existing bookmark for the URL or create new one + url = link_tag['href'] + bookmark = _get_or_create_bookmark(url, user) + + bookmark.url = url + bookmark.date_added = datetime.utcfromtimestamp(int(link_tag['add_date'])) + bookmark.date_modified = bookmark.date_added + bookmark.unread = link_tag['toread'] == '1' + bookmark.title = link_tag.string + bookmark.owner = user + + bookmark.save() + + +def _get_or_create_bookmark(url: str, user: User): + try: + return Bookmark.objects.get(url=url, owner=user) + except Bookmark.DoesNotExist: + return Bookmark()