Skip to content

Commit 51cf8de

Browse files
bump dependencies and python version
1 parent cb327e3 commit 51cf8de

File tree

3 files changed

+1013
-717
lines changed

3 files changed

+1013
-717
lines changed

findpapers/tools/refiner_tool.py

Lines changed: 129 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
import re
33
import os
44
from typing import Optional, List
5-
from colorama import Fore, Back, Style, init
6-
from findpapers.models.search import Search
5+
from termcolor import colored
76
from findpapers.models.paper import Paper
87
import findpapers.utils.common_util as common_util
98
import findpapers.utils.persistence_util as persistence_util
109

1110

12-
def _print_paper_details(paper: Paper, highlights: List[str], show_abstract: bool, show_extra_info: bool): # pragma: no cover
11+
def _print_paper_details(
12+
paper: Paper, highlights: List[str], show_abstract: bool, show_extra_info: bool
13+
): # pragma: no cover
1314
"""
1415
Private method used to print on console the paper details
1516
@@ -25,66 +26,110 @@ def _print_paper_details(paper: Paper, highlights: List[str], show_abstract: boo
2526
A flag to indicate if the paper's extra info should be shown or not, by default False
2627
"""
2728

28-
print(f"{Fore.GREEN}{Style.BRIGHT}Title:{Style.NORMAL} {paper.title}")
29-
print(f"{Fore.GREEN}{Style.BRIGHT}Authors:{Style.NORMAL} {' | '.join(paper.authors)}")
29+
print(f"{colored('Title:', 'green', attrs=['bold'])} {paper.title}")
30+
print(f"{colored('Authors:', 'green', attrs=['bold'])} {' | '.join(paper.authors)}")
3031
if len(paper.keywords) > 0:
31-
print(f"{Fore.GREEN}{Style.BRIGHT}Keywords:{Style.NORMAL} {', '.join(paper.keywords)}")
32-
print(f"{Fore.GREEN}{Style.BRIGHT}Publication date:{Style.NORMAL} {paper.publication_date.strftime('%Y-%m-%d')}")
32+
print(
33+
f"{colored('Keywords:', 'green', attrs=['bold'])} {', '.join(paper.keywords)}"
34+
)
35+
print(
36+
f"{colored('Publication date:', 'green', attrs=['bold'])} {paper.publication_date.strftime('%Y-%m-%d')}"
37+
)
3338

3439
print("\n")
3540

3641
if show_abstract:
3742
abstract = paper.abstract
3843
for term in highlights:
39-
abstract = re.sub(r"({0}+)".format(term), Fore.YELLOW + Style.BRIGHT +
40-
r"\1" + Fore.RESET + Style.NORMAL, abstract, flags=re.IGNORECASE)
44+
abstract = re.sub(
45+
r"({})".format(
46+
re.escape(term)
47+
), # Use re.escape(term) for safety with special regex characters
48+
colored(r"\1", "yellow", attrs=["bold"]),
49+
abstract,
50+
flags=re.IGNORECASE,
51+
)
4152
print(abstract)
4253

4354
print("\n")
4455

45-
if show_extra_info:
46-
if paper.comments is not None:
47-
print(f"{Style.BRIGHT}Paper comments:{Style.NORMAL} {paper.comments}")
48-
if paper.citations is not None:
49-
print(f"{Style.BRIGHT}Paper citations:{Style.NORMAL} {paper.citations}")
50-
if paper.number_of_pages is not None:
51-
print(f"{Style.BRIGHT}Paper number of pages:{Style.NORMAL} {paper.number_of_pages}")
52-
if paper.doi is not None:
53-
print(f"{Style.BRIGHT}Paper DOI:{Style.NORMAL} {paper.doi}")
54-
if paper.databases is not None:
55-
print(f"{Style.BRIGHT}Paper found in:{Style.NORMAL} {', '.join(paper.databases)}")
56-
if len(paper.urls) > 0:
57-
print(f"{Style.BRIGHT}Paper URL:{Style.NORMAL} {list(paper.urls)[0]}")
58-
59-
if paper.publication is not None:
60-
print(f"{Style.BRIGHT}Publication name:{Style.NORMAL} {paper.publication.title}")
61-
print(f"{Style.BRIGHT}Publication is potentially predatory:{Style.NORMAL} {paper.publication.is_potentially_predatory}")
62-
if paper.publication.category is not None:
63-
print(f"{Style.BRIGHT}Publication category:{Style.NORMAL} {paper.publication.category}")
64-
if len(paper.publication.subject_areas) > 0:
65-
print(f"{Style.BRIGHT}Publication areas:{Style.NORMAL} {', '.join(paper.publication.subject_areas)}")
66-
if paper.publication.isbn is not None:
67-
print(f"{Style.BRIGHT}Publication ISBN:{Style.NORMAL} {paper.publication.isbn}")
68-
if paper.publication.issn is not None:
69-
print(f"{Style.BRIGHT}Publication ISSN:{Style.NORMAL} {paper.publication.issn}")
70-
if paper.publication.publisher is not None:
71-
print(f"{Style.BRIGHT}Publication publisher:{Style.NORMAL} {paper.publication.publisher}")
72-
if paper.publication.cite_score is not None:
73-
print(f"{Style.BRIGHT}Publication Cite Score:{Style.NORMAL} {paper.publication.cite_score}")
74-
if paper.publication.sjr is not None:
75-
print(f"{Style.BRIGHT}Publication SJR:{Style.NORMAL} {paper.publication.sjr}")
76-
if paper.publication.snip is not None:
77-
print(f"{Style.BRIGHT}Publication SNIP:{Style.NORMAL} {paper.publication.snip}")
56+
if show_extra_info:
57+
if paper.comments is not None:
58+
print(f"{colored('Paper comments:', attrs=['bold'])} {paper.comments}")
59+
if paper.citations is not None:
60+
print(
61+
f"{colored('Paper citations:', attrs=['bold'])} {paper.citations}"
62+
)
63+
if paper.number_of_pages is not None:
64+
print(
65+
f"{colored('Paper number of pages:', attrs=['bold'])} {paper.number_of_pages}"
66+
)
67+
if paper.doi is not None:
68+
print(f"{colored('Paper DOI:', attrs=['bold'])} {paper.doi}")
69+
if paper.databases is not None:
70+
print(
71+
f"{colored('Paper found in:', attrs=['bold'])} {', '.join(paper.databases)}"
72+
)
73+
if len(paper.urls) > 0:
74+
print(f"{colored('Paper URL:', attrs=['bold'])} {list(paper.urls)[0]}")
75+
76+
if paper.publication is not None:
77+
print(
78+
f"{colored('Publication name:', attrs=['bold'])} {paper.publication.title}"
79+
)
80+
print(
81+
f"{colored('Publication is potentially predatory:', attrs=['bold'])} {paper.publication.is_potentially_predatory}"
82+
)
83+
if paper.publication.category is not None:
84+
print(
85+
f"{colored('Publication category:', attrs=['bold'])} {paper.publication.category}"
86+
)
87+
if len(paper.publication.subject_areas) > 0:
88+
print(
89+
f"{colored('Publication areas:', attrs=['bold'])} {', '.join(paper.publication.subject_areas)}"
90+
)
91+
if paper.publication.isbn is not None:
92+
print(
93+
f"{colored('Publication ISBN:', attrs=['bold'])} {paper.publication.isbn}"
94+
)
95+
if paper.publication.issn is not None:
96+
print(
97+
f"{colored('Publication ISSN:', attrs=['bold'])} {paper.publication.issn}"
98+
)
99+
if paper.publication.publisher is not None:
100+
print(
101+
f"{colored('Publication publisher:', attrs=['bold'])} {paper.publication.publisher}"
102+
)
103+
if paper.publication.cite_score is not None:
104+
print(
105+
f"{colored('Publication Cite Score:', attrs=['bold'])} {paper.publication.cite_score}"
106+
)
107+
if paper.publication.sjr is not None:
108+
print(
109+
f"{colored('Publication SJR:', attrs=['bold'])} {paper.publication.sjr}"
110+
)
111+
if paper.publication.snip is not None:
112+
print(
113+
f"{colored('Publication SNIP:', attrs=['bold'])} {paper.publication.snip}"
114+
)
78115

79116
print("\n")
80117

81-
if paper.selected is not None:
82-
83-
print(f"{Fore.BLUE}{Style.BRIGHT}Selected: {Style.NORMAL}{'Yes' if paper.selected else 'No'}")
84-
85-
if paper.categories is not None and len(paper.categories.items()) > 0:
86-
categories_string = " | ".join([f"{k}: {', '.join(v)}" for k, v in paper.categories.items() if len(v) > 0])
87-
print(f"{Fore.BLUE}{Style.BRIGHT}Categories: {Style.NORMAL}{categories_string}")
118+
if paper.selected is not None:
119+
selected_text = "Yes" if paper.selected else "No"
120+
print(f"{colored('Selected:', 'blue', attrs=['bold'])} {selected_text}")
121+
122+
if paper.categories is not None and len(paper.categories.items()) > 0:
123+
categories_string = " | ".join(
124+
[
125+
f"{k}: {', '.join(v)}"
126+
for k, v in paper.categories.items()
127+
if len(v) > 0
128+
]
129+
)
130+
print(
131+
f"{colored('Categories:', 'blue', attrs=['bold'])} {categories_string}"
132+
)
88133

89134
print("\n")
90135

@@ -99,22 +144,19 @@ def _get_select_question_input(): # pragma: no cover
99144
User provided input
100145
"""
101146
questions = [
102-
inquirer.List("answer",
103-
message="Do you wanna select this paper?",
104-
choices=[
105-
"Skip",
106-
"No",
107-
"Yes",
108-
"Save what I've done so far and leave"],
109-
),
147+
inquirer.List(
148+
"answer",
149+
message="Do you wanna select this paper?",
150+
choices=["Skip", "No", "Yes", "Save what I've done so far and leave"],
151+
),
110152
]
111153
return inquirer.prompt(questions).get("answer")
112154

113155

114156
def _get_category_question_input(categories: dict): # pragma: no cover
115157
"""
116158
Private method that prompts a question about the paper category
117-
159+
118160
Parameters
119161
----------
120162
categories : dict
@@ -129,29 +171,37 @@ def _get_category_question_input(categories: dict): # pragma: no cover
129171
selections = {}
130172

131173
for facet, facet_categories in categories.items():
132-
133174
questions = [
134-
inquirer.Checkbox("answer",
135-
message=f"With respect to \"{facet}\", which categories does the document belong to?",
136-
choices=facet_categories,
137-
),
175+
inquirer.Checkbox(
176+
"answer",
177+
message=f'With respect to "{facet}", which categories does the document belong to?',
178+
choices=facet_categories,
179+
),
138180
]
139181

140182
answers = inquirer.prompt(questions)
141183

142184
selections[facet] = answers.get("answer")
143-
185+
144186
return selections
145187

146188

147-
def refine(search_path: str, categories: Optional[dict] = None, highlights: Optional[list] = None, show_abstract: Optional[bool] = False,
148-
show_extra_info: Optional[bool] = False, only_selected_papers: Optional[bool] = False, only_removed_papers: Optional[bool] = False,
149-
read_only: Optional[bool] = False, verbose: Optional[bool] = False):
189+
def refine(
190+
search_path: str,
191+
categories: Optional[dict] = None,
192+
highlights: Optional[list] = None,
193+
show_abstract: Optional[bool] = False,
194+
show_extra_info: Optional[bool] = False,
195+
only_selected_papers: Optional[bool] = False,
196+
only_removed_papers: Optional[bool] = False,
197+
read_only: Optional[bool] = False,
198+
verbose: Optional[bool] = False,
199+
):
150200
"""
151201
When you have a search result and wanna refine it, this is the method that you'll need to call.
152-
This method will iterate through all the papers showing their collected data,
202+
This method will iterate through all the papers showing their collected data,
153203
then asking if you wanna select a particular paper or not, and assign a category if a list of categories is provided.
154-
And to help you on the refinement, this method can also highlight some terms on the paper's abstract by a provided list of them
204+
And to help you on the refinement, this method can also highlight some terms on the paper's abstract by a provided list of them
155205
156206
Parameters
157207
----------
@@ -187,8 +237,6 @@ def refine(search_path: str, categories: Optional[dict] = None, highlights: Opti
187237
common_util.logging_initialize(verbose)
188238
common_util.check_write_access(search_path)
189239

190-
init(autoreset=True) # colorama initializer
191-
192240
if categories is None:
193241
categories = {}
194242
if highlights is None:
@@ -201,14 +249,17 @@ def refine(search_path: str, categories: Optional[dict] = None, highlights: Opti
201249
if paper.selected is not None:
202250
has_already_refined_papers = True
203251
break
204-
252+
205253
todo_papers = []
206254
done_papers = []
207255

208256
for paper in search.papers:
209-
#if wanna_re_refine_papers:
210-
if (only_selected_papers or only_removed_papers):
211-
if paper.selected is not None and ((only_selected_papers and paper.selected) or (only_removed_papers and not paper.selected)):
257+
# if wanna_re_refine_papers:
258+
if only_selected_papers or only_removed_papers:
259+
if paper.selected is not None and (
260+
(only_selected_papers and paper.selected)
261+
or (only_removed_papers and not paper.selected)
262+
):
212263
todo_papers.append(paper)
213264
else:
214265
if paper.selected is None or read_only:
@@ -219,16 +270,14 @@ def refine(search_path: str, categories: Optional[dict] = None, highlights: Opti
219270
todo_papers = sorted(todo_papers, key=lambda x: x.publication_date, reverse=True)
220271

221272
for i, paper in enumerate(todo_papers):
222-
223273
print(f"\n{'.' * os.get_terminal_size()[0]}\n")
224274

225275
if not read_only:
226-
print(f"\n{Fore.CYAN}{i+1}/{len(todo_papers)} papers\n")
276+
print(f"\n{colored(f'{i + 1}/{len(todo_papers)} papers', 'cyan')}\n")
227277

228278
_print_paper_details(paper, highlights, show_abstract, show_extra_info)
229279

230280
if not read_only:
231-
232281
answer = _get_select_question_input()
233282

234283
if answer == "Skip":
@@ -242,10 +291,10 @@ def refine(search_path: str, categories: Optional[dict] = None, highlights: Opti
242291

243292
if paper.selected:
244293
paper.categories = _get_category_question_input(categories)
245-
294+
246295
done_papers.append(paper)
247296

248297
if read_only:
249-
print(f"\n{Fore.CYAN}{len(todo_papers)} papers\n")
298+
print(f"\n{colored(f'{len(todo_papers)} papers', 'cyan')}\n")
250299
else:
251300
persistence_util.save(search, search_path)

0 commit comments

Comments
 (0)