Skip to content

Commit ad337aa

Browse files
committed
hxl2example: now example also allow expose API via web (uses hug)
1 parent 03b1edf commit ad337aa

File tree

4 files changed

+138
-7
lines changed

4 files changed

+138
-7
lines changed

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,34 @@ license etc.
6363

6464
What it does: `hxl2example` accepts one HXLated dataset and save as .CSV.
6565

66+
**Quick examples**
67+
68+
```bash
69+
### Basic examples
70+
71+
# This will output a local file to stdout (tip: you can disable local files)
72+
hxl2example tests/files/iris_hxlated-csv.csv
73+
74+
# This will save to a local file
75+
hxl2example tests/files/iris_hxlated-csv.csv my-local-file.example
76+
77+
# Since we use the libhxl-python, remote HXLated remote urls works too!
78+
hxl2example https://docs.google.com/spreadsheets/d/1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/edit#gid=319251406
79+
80+
### Advanced usage (if you need to share work with others)
81+
82+
## Quick ad-hoc web proxy, local usage
83+
# @see https://github.com/hugapi/hug
84+
85+
hug -f bin/hxl2example
86+
# http://localhost:8000/ will how an JSON documentation of hug endpoints. TL;DR:
87+
# http://localhost:8000/hxl2example.csv?source_url=http://example.com/remote-file.csv
88+
89+
## Expose local web proxy to others
90+
# @see https://ngrok.com/
91+
ngrok http 8000
92+
```
93+
6694
##### 1.2.2 `hxl2tab`: tab format, focused for compatibility with Orange Data Mining
6795
- Main issue: <https://github.com/EticaAI/HXL-Data-Science-file-formats/issues/2>
6896
- Orange File Specification: <https://orange-data-mining-library.readthedocs.io/en/latest/reference/data.io.html>

bin/hxl2example

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,26 @@
55
#
66
# USAGE: hxl2example hxlated-data.hxl my-exported-file.example
77
# cat hxlated-data.hxl | hxl2example > my-exported-file.example
8+
# # Via web, in two different terminals, do it
9+
# hug -f bin/hxl2example
10+
# ngrok http 8000
811
#
912
# DESCRIPTION: hxl2example is an example script to create other scripts with
1013
# some bare minimum command line interface that could work.
1114
# With exception of external libraries, the hxl2example is
1215
# meant to be somewhat self-contained one-file executable ready
13-
# to just be added to the path
16+
# to just be added to the path.
17+
#
18+
# Hug API can be used to create an ad-hoc web interface to your
19+
# script. This can be both useful if you are using an software
20+
# that accepts an URL as data source and you don't want to use
21+
# this script to save a file locally.
1422
#
1523
# OPTIONS: ---
1624
#
1725
# REQUIREMENTS: - python3
1826
# - libhxl (@see https://pypi.org/project/libhxl/)
27+
# - hug (https://github.com/hugapi/hug/)
1928
# - your-extra-python-lib-here
2029
# - your-non-python-dependency-here
2130
# BUGS: ---
@@ -33,11 +42,19 @@ import os
3342
import logging
3443
import argparse
3544

45+
# @see https://github.com/HXLStandard/libhxl-python
46+
# pip3 install libhxl --upgrade
3647
# Do not import hxl, to avoid circular imports
3748
import hxl.converters
3849
import hxl.filters
3950
import hxl.io
4051

52+
import tempfile
53+
54+
# @see https://github.com/hugapi/hug
55+
# pip3 install hug --upgrade
56+
import hug
57+
4158
# In Python2, sys.stdin is a byte stream; in Python3, it's a text stream
4259
STDIN = sys.stdin.buffer
4360

@@ -72,10 +89,10 @@ class HXL2Example:
7289
self.args = parser.parse_args()
7390
return self.args
7491

75-
def do_example_output(self, args,
76-
stdin=STDIN, stdout=sys.stdout, stderr=sys.stderr):
92+
def execute_cli(self, args,
93+
stdin=STDIN, stdout=sys.stdout, stderr=sys.stderr):
7794
"""
78-
The do_example_output is the main entrypoint of HXL2Example. When
95+
The execute_cli is the main entrypoint of HXL2Example. When
7996
called will convert the HXL source to example format.
8097
"""
8198

@@ -88,6 +105,49 @@ class HXL2Example:
88105

89106
return self.EXIT_OK
90107

108+
def execute_web(self, source_url, stdin=STDIN, stdout=sys.stdout,
109+
stderr=sys.stderr, hxlmeta=False):
110+
"""
111+
The execute_web is the main entrypoint of HXL2Tab when this class is
112+
called outside command line interface, like the build in HTTP use with
113+
hug
114+
"""
115+
116+
# TODO: the execute_web needs to output the tabfile with correct
117+
# mimetype, compression, etc
118+
# (fititnt, 2021-02-07 15:59 UTC)
119+
120+
self.hxlhelper = HXLUtils()
121+
122+
try:
123+
temp_input = tempfile.NamedTemporaryFile('w')
124+
temp_output = tempfile.NamedTemporaryFile('w')
125+
126+
webargs = type('obj', (object,), {
127+
"infile": source_url,
128+
"sheet_index": None,
129+
"selector": None,
130+
'sheet': None,
131+
'http_header': None,
132+
'ignore_certs': False
133+
})
134+
135+
with self.hxlhelper.make_source(webargs, stdin) as source:
136+
for line in source.gen_csv(True, True):
137+
temp_input.write(line)
138+
139+
temp_input.seek(0)
140+
# self.hxl2tab(temp_input.name, temp_output.name, False)
141+
142+
result_file = open(temp_input.name, 'r')
143+
return result_file.read()
144+
145+
finally:
146+
temp_input.close()
147+
temp_output.close()
148+
149+
return self.EXIT_OK
150+
91151

92152
class HXLUtils:
93153
"""
@@ -224,7 +284,7 @@ class HXLUtils:
224284
url_or_filename or stdin,
225285
sheet_index=sheet_index,
226286
selector=selector,
227-
allow_local=True,
287+
allow_local=True, # TODO: consider change this for execute_web
228288
http_headers=http_headers,
229289
verify_ssl=(not args.ignore_certs)
230290
)
@@ -297,4 +357,30 @@ if __name__ == "__main__":
297357
hxl2example = HXL2Example()
298358
args = hxl2example.make_args_hxl2example()
299359

300-
hxl2example.do_example_output(args)
360+
hxl2example.execute_cli(args)
361+
362+
363+
@hug.format.content_type('text/csv')
364+
def output_csv(data, response):
365+
if isinstance(data, dict) and 'errors' in data:
366+
response.content_type = 'application/json'
367+
return hug.output_format.json(data)
368+
response.content_type = 'text/csv'
369+
if hasattr(data, "read"):
370+
return data
371+
372+
return str(data).encode("utf8")
373+
374+
375+
@hug.get('/hxl2example.csv', output=output_csv)
376+
def api_hxl2tab(source_url):
377+
"""hxl2example (@see https://github.com/EticaAI/HXL-Data-Science-file-formats)
378+
379+
Example:
380+
http://localhost:8000/hxl2tab.tab?source_url=https://docs.google.com/spreadsheets/u/1/d/1l7POf1WPfzgJb-ks4JM86akFSvaZOhAUWqafSJsm3Y4/edit#gid=634938833
381+
382+
"""
383+
384+
hxl2example = HXL2Example()
385+
386+
return hxl2example.execute_web(source_url)

bin/hxl2tab

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ class HXL2Tab:
172172
temp_input = tempfile.NamedTemporaryFile('w')
173173
temp_output = tempfile.NamedTemporaryFile('w')
174174

175+
# TODO: implement other options beyond source_url
175176
webargs = type('obj', (object,), {
176177
"infile": source_url,
177178
"sheet_index": None,

tests/manual-tests.sh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@ sudo snap install ngrok
1010

1111
#### The tests _________________________________________________________________
1212

13+
### hxl2example ----------------------------------------------------------------
14+
15+
hxl2example tests/files/iris_hxlated-csv.csv | head
16+
hxl2example tests/files/iris_hxlated-csv.csv temp/iris.tab
17+
hxl2example https://docs.google.com/spreadsheets/u/1/d/1l7POf1WPfzgJb-ks4JM86akFSvaZOhAUWqafSJsm3Y4/edit#gid=634938833 | head
18+
hxl2example https://docs.google.com/spreadsheets/u/1/d/1l7POf1WPfzgJb-ks4JM86akFSvaZOhAUWqafSJsm3Y4/edit#gid=634938833 data-mining-projects/output/HXL-CPLP-Exemplar_iris.tab
19+
20+
## hug -f bin/hxl2example ......................................................
21+
#@see https://hugapi.github.io/hug/
22+
#@see https://github.com/hugapi/hug/
23+
hug -f bin/hxl2example
24+
25+
curl --silent http://localhost:8000/hxl2example.csv?source_url=https://docs.google.com/spreadsheets/u/1/d/1l7POf1WPfzgJb-ks4JM86akFSvaZOhAUWqafSJsm3Y4/edit#gid=634938833 | head
26+
# HXLStandard_HXLCoreSchema_CoreHashtags
27+
curl --silent http://localhost:8000/hxl2example.csv?source_url=https://docs.google.com/spreadsheets/d/1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/edit#gid=319251406 | head
28+
1329
### hxl2tab --------------------------------------------------------------------
1430

1531
hxl2tab tests/files/iris_hxlated-csv.csv temp/iris.tab
@@ -22,7 +38,7 @@ hxl2tab https://docs.google.com/spreadsheets/d/1Vqv6-EAdSHMSZvZtE426aXkDiwP8Mdrp
2238
hxl2tab https://docs.google.com/spreadsheets/d/1Vqv6-EAdSHMSZvZtE426aXkDiwP8Mdrpft3tiGQ1RH0/edit#gid=0 temp/example-ebola-dataset-1_HXLated+tab_hxltabv15_b.tab
2339
hxl2tab https://docs.google.com/spreadsheets/d/1Vqv6-EAdSHMSZvZtE426aXkDiwP8Mdrpft3tiGQ1RH0/edit#gid=0 temp/example-ebola-dataset-1_HXLated+tab_hxltabv16.tab
2440

25-
### hug -f bin/hxl2tab ---------------------------------------------------------
41+
## hug -f bin/hxl2tab ..........................................................
2642
#@see https://hugapi.github.io/hug/
2743
#@see https://github.com/hugapi/hug/
2844
hug -f bin/hxl2tab

0 commit comments

Comments
 (0)