Skip to content

Commit a3e7dd8

Browse files
Merge branch 'main' into dependabot/pip/certifi-2025.8.3
2 parents 773923b + f73eecd commit a3e7dd8

File tree

77 files changed

+3215
-861
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+3215
-861
lines changed

.dockerignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
.DS_Store
2+
.data
23
.env
34
.venv
45
.vscode
@@ -14,8 +15,10 @@ __pycache__
1415
/media
1516
/static
1617
/test_data
18+
/tests
1719
/neodb
1820
/neodb-takahe/docs
1921
/neodb-takahe/docker
2022
/neodb-takahe/static-collected
2123
/neodb-takahe/takahe/local_settings.py
24+
/neodb-takahe/tests

.github/workflows/check.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,10 @@ jobs:
3333
uses: actions/setup-python@v5
3434
with:
3535
python-version: ${{ matrix.python-version }}
36-
cache: pip
3736
- name: Install uv
3837
uses: astral-sh/setup-uv@v6
38+
with:
39+
enable-cache: true
3940
- name: Install Dependencies
4041
run: |
4142
uv sync

.github/workflows/tests.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,23 +48,23 @@ jobs:
4848
uses: actions/setup-python@v5
4949
with:
5050
python-version: ${{ matrix.python-version }}
51-
cache: pip
5251
- name: Install uv
5352
uses: astral-sh/setup-uv@v6
53+
with:
54+
enable-cache: true
5455
- name: Install Dependencies
5556
run: |
5657
uv sync
5758
sudo apt install -y gettext
5859
- name: Run Tests
5960
env:
61+
NEODB_SECRET_KEY: test
62+
NEODB_SITE_NAME: test
63+
NEODB_SITE_DOMAIN: example.org
6064
NEODB_DB_URL: postgres://testuser:testpass@127.0.0.1/test_neodb
6165
TAKAHE_DB_URL: postgres://testuser:testpass@127.0.0.1/test_neodb_takahe
6266
NEODB_REDIS_URL: redis://127.0.0.1:6379/0
6367
NEODB_SEARCH_URL: typesense://testuser:testpass@127.0.0.1:8108/cat
64-
NEODB_SITE_NAME: test
65-
NEODB_SITE_DOMAIN: test.domain
66-
NEODB_SECRET_KEY: test
67-
SPOTIFY_API_KEY: TEST
6868
run: |
6969
uv run manage.py compilemessages -i .venv -l zh_Hans
7070
uv run pytest

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Follow us on [Fediverse](https://mastodon.online/@neodb), [Bluesky](https://bsky
3737
* Bangumi
3838
* Board Game Geek
3939
* Archive of Our Own
40+
* WikiData
4041
* any RSS link to a podcast
4142
- Logged in users can manage their collections:
4243
+ mark an item as wishlist/in progress/complete
@@ -49,6 +50,7 @@ Follow us on [Fediverse](https://mastodon.online/@neodb), [Bluesky](https://bsky
4950
* Goodreads reading list
5051
* Letterboxd watch list
5152
* Douban archive (via [Doufen](https://doufen.org/))
53+
* Steam Library
5254
- Social features:
5355
+ view home feed with friends' activities
5456
* every activity can be set as viewable to self/follower-only/public

catalog/apps.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ def ready(self):
1212
from journal import models as journal_models # noqa
1313

1414
# register cron jobs
15-
from catalog.jobs import DiscoverGenerator, PodcastUpdater # noqa
15+
from catalog.jobs import DiscoverGenerator, PodcastUpdater, CatalogStats # noqa
1616

1717
init_catalog_audit_log()

catalog/book/models.py

Lines changed: 21 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def set_work(self, work: "Work | None"):
294294
if work:
295295
work.editions.add(self)
296296

297-
def merge_to(self, to_item: "Edition | None"): # type: ignore[reportIncompatibleMethodOverride]
297+
def merge_to(self, to_item):
298298
super().merge_to(to_item)
299299
if to_item:
300300
if self.merge_title():
@@ -314,45 +314,24 @@ def delete(self, *args, **kwargs):
314314
self.set_work(None)
315315
return super().delete(*args, **kwargs)
316316

317-
def update_linked_items_from_external_resource(self, resource):
318-
"""add Work from resource.metadata['work'] if not yet"""
319-
links = resource.required_resources + resource.related_resources
320-
for w in links:
321-
if w.get("model") == "Work":
322-
work_res = ExternalResource.objects.filter(
323-
id_type=w["id_type"], id_value=w["id_value"]
324-
).first()
325-
if work_res:
326-
work = work_res.item
327-
if not work:
328-
logger.warning(f"Unable to find work for {work_res}")
329-
else:
330-
logger.warning(
331-
f"Unable to find resource for {w['id_type']}:{w['id_value']}"
332-
)
333-
work = Work.objects.filter(
334-
primary_lookup_id_type=w["id_type"],
335-
primary_lookup_id_value=w["id_value"],
336-
).first()
337-
if work:
338-
w = self.get_work()
339-
if w:
340-
if w != work:
341-
w.log_action(
342-
{"!link_and_merge": [str(self), str(resource)]}
343-
)
344-
logger.info(
345-
f"Merging {work} to {w} when fetching from {resource}"
346-
)
347-
work.merge_to(w)
348-
else:
349-
self.set_work(work)
350-
351-
def merge_data_from_external_resource(
352-
self, p: "ExternalResource", ignore_existing_content: bool = False
353-
):
354-
super().merge_data_from_external_resource(p, ignore_existing_content)
355-
self.merge_title()
317+
def process_fetched_item(self, fetched, link_type):
318+
if link_type == ExternalResource.LinkType.PARENT and isinstance(fetched, Work):
319+
w = self.get_work()
320+
if w:
321+
if w == fetched:
322+
return False
323+
w.log_action({"!merge_on_fetch": [str(self), str(fetched)]})
324+
logger.info(f"Merging {fetched} to {w} when fetched {self}.")
325+
fetched.merge_to(w)
326+
else:
327+
self.set_work(fetched)
328+
return True
329+
return False
330+
331+
def normalize_metadata(self, override_resources=[]):
332+
r = super().normalize_metadata(override_resources)
333+
r |= self.merge_title()
334+
return r
356335

357336
def merge_title(self) -> bool:
358337
# Edition should have only one title, so extra titles will be merged to other_title, return True if updated
@@ -491,13 +470,13 @@ def lookup_id_type_choices(cls):
491470
]
492471
return [(i.value, i.label) for i in id_types]
493472

494-
def merge_to(self, to_item: "Work | None"): # type: ignore[reportIncompatibleMethodOverride]
473+
def merge_to(self, to_item):
495474
super().merge_to(to_item)
496475
if not to_item:
497476
return
498477
for edition in self.editions.all():
499478
edition.set_work(to_item)
500-
to_item.language = uniq(to_item.language + self.language) # type: ignore
479+
to_item.language = uniq(to_item.language + self.language)
501480
to_item.localized_title = uniq(to_item.localized_title + self.localized_title)
502481
to_item.save()
503482

@@ -515,29 +494,6 @@ def cover_image_url(self):
515494
e = next(filter(lambda e: e.cover_image_url, self.editions.all()), None)
516495
return e.cover_image_url if e else None
517496

518-
def update_linked_items_from_external_resource(self, resource):
519-
"""add Edition from resource.metadata['required_resources'] if not yet"""
520-
links = resource.required_resources + resource.related_resources
521-
for e in links:
522-
if e.get("model") == "Edition":
523-
edition_res = ExternalResource.objects.filter(
524-
id_type=e["id_type"], id_value=e["id_value"]
525-
).first()
526-
if edition_res:
527-
edition = edition_res.item
528-
if not edition:
529-
logger.warning(f"Unable to find edition for {edition_res}")
530-
else:
531-
logger.warning(
532-
f"Unable to find resource for {e['id_type']}:{e['id_value']}"
533-
)
534-
edition = Edition.objects.filter(
535-
primary_lookup_id_type=e["id_type"],
536-
primary_lookup_id_value=e["id_value"],
537-
).first()
538-
if edition:
539-
edition.set_work(self)
540-
541497
def to_indexable_doc(self):
542498
return {} # no index for Work, for now
543499

catalog/common/downloaders.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ def get_mock_mode():
4747

4848
def get_mock_file(url):
4949
fn = url.replace("***REMOVED***", "1234") # Thank you, Github Action -_-!
50+
fn = re.sub(r"key=[*A-Za-z0-9_\-]+", "key_8964", fn)
5051
fn = re.sub(r"[^\w]", "_", fn)
51-
fn = re.sub(r"_key_[*A-Za-z0-9]+", "_key_8964", fn)
5252
if len(fn) > 255:
5353
fn = fn[:255]
5454
return fn
@@ -70,7 +70,8 @@ def __init__(self, url):
7070
except Exception:
7171
self.content = b"Error: response file not found"
7272
self.status_code = 404
73-
logger.debug(f"local response not found for {url} at {fn}")
73+
if ".jpg" not in self.url:
74+
logger.warning(f"local response not found for {url} at {fn}")
7475

7576
@property
7677
def text(self):
@@ -89,9 +90,7 @@ def xml(self):
8990

9091
@property
9192
def headers(self):
92-
return {
93-
"Content-Type": "image/jpeg" if self.url.endswith("jpg") else "text/html"
94-
}
93+
return {"Content-Type": "image/jpeg" if ".jpg" in self.url else "text/html"}
9594

9695

9796
class DownloaderResponse(Response):

catalog/common/migrations.py

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
import logging
1+
from time import sleep
22

33
from django.db import connection, models
4+
from loguru import logger
45
from tqdm import tqdm
56

6-
logger = logging.getLogger(__name__)
7-
87

98
def fix_20250208():
109
logger.warning("Fixing soft-deleted editions...")
@@ -139,3 +138,68 @@ def normalize_language_20250524():
139138
i.save(update_fields=["metadata"])
140139
u += 1
141140
logger.warning(f"normalize_language finished. {u} of {c} items updated.")
141+
142+
143+
def link_tmdb_wikidata_20250815(limit=None):
144+
"""
145+
Scan all TMDB Movie and TVShow resources, refetch them, and link to WikiData resources if available.
146+
147+
This function:
148+
1. Finds all ExternalResources with TMDB Movie and TVShow ID types
149+
2. Refetches each TMDB resource to ensure we have the latest data
150+
3. If the TMDB resource has a WikiData ID, fetches the corresponding WikiData resource
151+
4. Links both resources to the same Item
152+
"""
153+
from catalog.common import IdType, SiteManager
154+
from catalog.common.models import ExternalResource
155+
from catalog.sites.wikidata import WikiData
156+
157+
logger.warning("Starting TMDB-WikiData linking process")
158+
tmdb_resources = ExternalResource.objects.filter(
159+
id_type__in=[IdType.TMDB_Movie, IdType.TMDB_TV]
160+
)
161+
if limit:
162+
tmdb_resources = tmdb_resources[:limit]
163+
count_total = tmdb_resources.count()
164+
count_with_wikidata = 0
165+
count_errors = 0
166+
logger.warning(f"Found {count_total} TMDB resources to process")
167+
for resource in tqdm(tmdb_resources, total=count_total):
168+
try:
169+
site_cls = SiteManager.get_site_cls_by_id_type(resource.id_type)
170+
if not site_cls:
171+
logger.error(f"Could not find site class for {resource.id_type}")
172+
count_errors += 1
173+
continue
174+
site = site_cls(resource.url)
175+
try:
176+
resource_content = site.scrape()
177+
except Exception as e:
178+
logger.error(f"Failed to scrape {resource.url}: {e}")
179+
count_errors += 1
180+
continue
181+
wikidata_id = resource_content.lookup_ids.get(IdType.WikiData)
182+
if not wikidata_id:
183+
continue
184+
resource.update_content(resource_content)
185+
count_with_wikidata += 1
186+
wiki_site = WikiData(id_value=wikidata_id)
187+
try:
188+
wiki_site.get_resource_ready()
189+
logger.success(f"Linked WikiData {wiki_site} to {site}")
190+
except Exception as e:
191+
logger.error(f"Failed to process WikiData {wikidata_id}: {e}")
192+
count_errors += 1
193+
sleep(0.5)
194+
except Exception as e:
195+
logger.error(f"Error processing resource {resource}: {e}")
196+
count_errors += 1
197+
logger.warning("TMDB-WikiData linking process completed:")
198+
logger.warning(f" Total TMDB resources processed: {count_total}")
199+
logger.warning(f" TMDB resources with WikiData IDs: {count_with_wikidata}")
200+
logger.warning(f" Errors encountered: {count_errors}")
201+
return {
202+
"total": count_total,
203+
"with_wikidata": count_with_wikidata,
204+
"errors": count_errors,
205+
}

0 commit comments

Comments
 (0)