Skip to content

Commit

Permalink
Merge pull request #241 from JohnSnowLabs/release/513/missing-metadat…
Browse files Browse the repository at this point in the history
…a-handling

missing metadata handling
  • Loading branch information
C-K-Loan authored Jan 21, 2024
2 parents 23b1210 + 342544d commit e7c3ffa
Showing 1 changed file with 3 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def extract_sparknlp_metadata(row: pd.Series, configs: SparkNLPExtractorConfig)
result = dict(
zip(map(lambda x: 'meta_' + configs.output_col_prefix + '_' + x, keys_in_metadata), metadata_scalars))
return result
extract_val_from_dic_list_to_list = lambda key: lambda x, y: x + [y[key]]
extract_val_from_dic_list_to_list = lambda key: lambda x, y: x + [y[key]] if key in y else x + [None]
# List of lambda expression, on for each Key to be extracted. (TODO balcklisting?)
dict_value_extractors = list(map(extract_val_from_dic_list_to_list, keys_in_metadata))
# reduce list of dicts with same struct and a common key to a list of values for thay key. Leveraging closuer for meta_dict_list
Expand Down Expand Up @@ -287,7 +287,6 @@ def extract_master(row: pd.Series, configs: SparkNLPExtractorConfig) -> pd.Serie
else:
base_annos = extract_base_sparknlp_features(row, configs)

# TODO proper finsiher handling!
# Get Metadata
all_metas = extract_sparknlp_metadata(row, configs) if configs.get_meta or configs.get_full_meta else {}

Expand All @@ -306,6 +305,8 @@ def extract_master(row: pd.Series, configs: SparkNLPExtractorConfig) -> pd.Serie
})




def apply_extractors_and_merge(df, anno_2_ex_config, keep_stranger_features, stranger_features):
""" apply extract_master on all fields with corrosponding configs after converting Pyspark Rows to List[Dict]
and merge them to a final DF (1 to 1 mapping still)
Expand Down

0 comments on commit e7c3ffa

Please sign in to comment.