Skip to content
This repository has been archived by the owner on Nov 15, 2021. It is now read-only.

Commit

Permalink
#26 parse function explanation added
Browse files Browse the repository at this point in the history
  • Loading branch information
ArtemisDicoTiar committed Nov 7, 2021
1 parent 108f777 commit 4345fdc
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions storyteller/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,18 @@ def parse(df: pd.DataFrame) -> pd.DataFrame:
"""

df['eg'] = df['eg'].apply(
# return list of example only on 'eg' column (proverb is converted to [WISDOM])
lambda r: list(map(
# while iterating list of 'hits'
# convert <em> ... lalib ... </em> to [WISDOM]
lambda hit: re.sub(r"<em>.*</em>", "[WISDOM]", hit['highlight']['sents'][0]),
# loading json to dict -> taking dict['hits']['hits']
json.loads(r)['hits']['hits']
))
)

# 'eg' column contains list object
# -> converted to single value with multiple columns
df = df.explode('eg')

return df
Expand Down

0 comments on commit 4345fdc

Please sign in to comment.