-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
184 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
# Keywads Library | ||
|
||
## About | ||
|
||
The Keywads Library is a Python package designed to simplify the process of analyzing keyword data from Excel files. It includes tools for transforming source files into a standardized format and analyzing keyword metrics to rank keywords based on competition and search volume. | ||
|
||
## Installation | ||
|
||
To use this library, clone or download it into your project directory or a location in your Python path. | ||
|
||
```bash | ||
git clone https://github.com/hashangit/keywads.git | ||
``` | ||
|
||
## Prerequisites | ||
|
||
Ensure you have Python and `pandas` installed in your environment: | ||
|
||
```bash | ||
pip install pandas | ||
``` | ||
|
||
## How to Use | ||
|
||
### Structure Your Application Folder | ||
|
||
We recommend structuring your application folder as follows for ease of use with default settings: | ||
|
||
``` | ||
your_project/ | ||
│ | ||
├── keywads/ | ||
│ ├── __init__.py | ||
│ ├── source_file_transformer.py | ||
│ └── keyword_analyzer.py | ||
│ | ||
├── data/ | ||
│ └── your_source_file.xlsx | ||
│ | ||
├── output/ | ||
│ | ||
└── your_script.py | ||
``` | ||
|
||
### Example Usage | ||
|
||
1. **Transforming a Source File**: First, ensure your source Excel file is in the `data` folder. Then use `SourceFileTransformer` to standardize the format. | ||
|
||
2. **Analyzing Keywords**: After transforming the source file, use `KeywordAnalyzer` to analyze and rank keywords, saving the results in the `output` folder. | ||
|
||
Here's an example snippet you can put in `your_script.py`: | ||
|
||
```python | ||
from keywads import SourceFileTransformer, KeywordAnalyzer | ||
|
||
# Transform the source file | ||
transformer = SourceFileTransformer(source_file_path='data/your_source_file.xlsx') | ||
transformer.load_and_transform_source() | ||
transformer.save_transformed_file() | ||
|
||
# Analyze keywords | ||
analyzer = KeywordAnalyzer() | ||
analyzer.run_analysis() | ||
``` | ||
|
||
This will process `your_source_file.xlsx` from the `data` folder, standardize it, and then analyze the keywords, saving the results as `top_200_keywords_adjusted.xlsx` in the `output` folder. | ||
|
||
### Custom Paths | ||
|
||
You can also specify custom paths for both the source and output files if your project structure differs from the recommended setup: | ||
|
||
```python | ||
# Specify custom paths | ||
custom_source_path = 'path/to/your/custom_source_file.xlsx' | ||
custom_output_path = 'path/to/your/custom_output_directory/' | ||
|
||
# Initialize the transformer with a custom path | ||
transformer = SourceFileTransformer(source_file_path=custom_source_path) | ||
transformer.load_and_transform_source() | ||
transformer.save_transformed_file(output_path=custom_output_path + 'Keyword_Stat.xlsx') | ||
|
||
# Initialize the analyzer with custom input and output paths | ||
analyzer = KeywordAnalyzer(file_path=custom_output_path + 'Keyword_Stat.xlsx') | ||
analyzer.run_analysis(output_file_name='Custom_Top_200_Keywords.xlsx') | ||
``` | ||
|
||
### Note | ||
|
||
Remember to replace `your_source_file.xlsx`, `path/to/your/custom_source_file.xlsx`, and `path/to/your/custom_output_directory/` with the actual paths to your files and directories. | ||
|
||
## Support | ||
|
||
For support, please open an issue in the GitHub repository at https://github.com/hashangit/keywads. | ||
|
||
## License | ||
|
||
[MIT License](LICENSE) - see the LICENSE file for details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# keyword_analysis_lib/__init__.py | ||
from .source_file_transformer import SourceFileTransformer | ||
from .keyword_analyzer import KeywordAnalyzer |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# keyword_analyzer.py | ||
import pandas as pd | ||
import os | ||
|
||
class KeywordAnalyzer: | ||
def __init__(self, file_path=None): | ||
if file_path is None: | ||
file_path = os.path.join('data', 'Keyword_Stat.xlsx') | ||
self.file_path = file_path | ||
self.df = None | ||
|
||
def load_data(self): | ||
try: | ||
self.df = pd.read_excel(self.file_path) | ||
print("Data loaded successfully.") | ||
except FileNotFoundError: | ||
print(f"File not found at {self.file_path}. Please check the file path.") | ||
|
||
def preprocess_data(self): | ||
self.df['Average Bid'] = (self.df['Top of page bid (low range)'] + self.df['Top of page bid (high range)']) / 2 | ||
self.df['Competition (indexed value)'] = self.df['Competition (indexed value)'].fillna(0) | ||
|
||
def calculate_ranks(self): | ||
self.df['Rank'] = self.df['Avg. monthly searches'] / (1 + self.df['Competition (indexed value)']) | ||
self.df['Adjusted Rank'] = self.df['Avg. monthly searches'] / ((1 + self.df['Competition (indexed value)']) * self.df['Average Bid']) | ||
self.df['Adjusted Rank'] = self.df.apply( | ||
lambda x: x['Avg. monthly searches'] / (1 + x['Competition (indexed value)']) if x['Average Bid'] == 0 else x['Adjusted Rank'], axis=1) | ||
|
||
def sort_and_extract_top_keywords(self, top_n=200): | ||
return self.df.sort_values(by='Adjusted Rank', ascending=False).head(top_n) | ||
|
||
def save_to_excel(self, data, output_file_name='top_200_keywords_adjusted.xlsx'): | ||
output_path = os.path.join('output', output_file_name) | ||
os.makedirs(os.path.dirname(output_path), exist_ok=True) | ||
data.to_excel(output_path, index=False) | ||
print(f"The adjusted list of top keywords has been saved to {output_path}") | ||
|
||
def run_analysis(self, output_file_name='top_200_keywords_adjusted.xlsx', top_n=200): | ||
output_path = os.path.join('output', output_file_name) | ||
self.load_data() | ||
if self.df is not None: | ||
self.preprocess_data() | ||
self.calculate_ranks() | ||
top_keywords = self.sort_and_extract_top_keywords(top_n=top_n) | ||
self.save_to_excel(output_path, top_keywords) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# source_file_transformer.py | ||
import pandas as pd | ||
import os | ||
|
||
class SourceFileTransformer: | ||
def __init__(self, source_file_path=''): | ||
self.source_file_path = source_file_path | ||
self.transformed_df = None | ||
|
||
def load_and_transform_source(self): | ||
try: | ||
source_df = pd.read_excel(self.source_file_path, skiprows=2) | ||
columns_to_keep = [ | ||
'Keyword', 'Avg. monthly searches', 'Competition', | ||
'Competition (indexed value)', 'Top of page bid (low range)', | ||
'Top of page bid (high range)', 'Ad impression share' | ||
] | ||
self.transformed_df = source_df[columns_to_keep] | ||
data_types = { | ||
'Keyword': 'object', | ||
'Avg. monthly searches': 'int64', | ||
'Competition': 'object', | ||
'Competition (indexed value)': 'float64', | ||
'Top of page bid (low range)': 'float64', | ||
'Top of page bid (high range)': 'float64', | ||
'Ad impression share': 'object' | ||
} | ||
self.transformed_df = self.transformed_df.astype(data_types) | ||
print("Transformation successful.") | ||
except Exception as e: | ||
print(f"An error occurred during file loading or transformation: {e}") | ||
|
||
def save_transformed_file(self, output_path=None): | ||
if output_path is None: | ||
output_path = os.path.join('data', 'Keyword_Stat.xlsx') | ||
os.makedirs(os.path.dirname(output_path), exist_ok=True) | ||
self.transformed_df.to_excel(output_path, index=False) | ||
print(f"The transformed file has been saved to {output_path}") | ||
|