Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
hashangit committed Mar 11, 2024
1 parent 3f7c8e3 commit 9ea33a4
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 0 deletions.
97 changes: 97 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Keywads Library

## About

The Keywads Library is a Python package designed to simplify the process of analyzing keyword data from Excel files. It includes tools for transforming source files into a standardized format and analyzing keyword metrics to rank keywords based on competition and search volume.

## Installation

To use this library, clone or download it into your project directory or a location in your Python path.

```bash
git clone https://github.com/hashangit/keywads.git
```

## Prerequisites

Ensure you have Python and `pandas` installed in your environment:

```bash
pip install pandas
```

## How to Use

### Structure Your Application Folder

We recommend structuring your application folder as follows for ease of use with default settings:

```
your_project/
├── keywads/
│ ├── __init__.py
│ ├── source_file_transformer.py
│ └── keyword_analyzer.py
├── data/
│ └── your_source_file.xlsx
├── output/
└── your_script.py
```

### Example Usage

1. **Transforming a Source File**: First, ensure your source Excel file is in the `data` folder. Then use `SourceFileTransformer` to standardize the format.

2. **Analyzing Keywords**: After transforming the source file, use `KeywordAnalyzer` to analyze and rank keywords, saving the results in the `output` folder.

Here's an example snippet you can put in `your_script.py`:

```python
from keywads import SourceFileTransformer, KeywordAnalyzer

# Transform the source file
transformer = SourceFileTransformer(source_file_path='data/your_source_file.xlsx')
transformer.load_and_transform_source()
transformer.save_transformed_file()

# Analyze keywords
analyzer = KeywordAnalyzer()
analyzer.run_analysis()
```

This will process `your_source_file.xlsx` from the `data` folder, standardize it, and then analyze the keywords, saving the results as `top_200_keywords_adjusted.xlsx` in the `output` folder.

### Custom Paths

You can also specify custom paths for both the source and output files if your project structure differs from the recommended setup:

```python
# Specify custom paths
custom_source_path = 'path/to/your/custom_source_file.xlsx'
custom_output_path = 'path/to/your/custom_output_directory/'

# Initialize the transformer with a custom path
transformer = SourceFileTransformer(source_file_path=custom_source_path)
transformer.load_and_transform_source()
transformer.save_transformed_file(output_path=custom_output_path + 'Keyword_Stat.xlsx')

# Initialize the analyzer with custom input and output paths
analyzer = KeywordAnalyzer(file_path=custom_output_path + 'Keyword_Stat.xlsx')
analyzer.run_analysis(output_file_name='Custom_Top_200_Keywords.xlsx')
```

### Note

Remember to replace `your_source_file.xlsx`, `path/to/your/custom_source_file.xlsx`, and `path/to/your/custom_output_directory/` with the actual paths to your files and directories.

## Support

For support, please open an issue in the GitHub repository at https://github.com/hashangit/keywads.

## License

[MIT License](LICENSE) - see the LICENSE file for details.
3 changes: 3 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# keyword_analysis_lib/__init__.py
from .source_file_transformer import SourceFileTransformer
from .keyword_analyzer import KeywordAnalyzer
Binary file added data/Keyword_Stat.xlsx
Binary file not shown.
45 changes: 45 additions & 0 deletions keyword_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# keyword_analyzer.py
import pandas as pd
import os

class KeywordAnalyzer:
def __init__(self, file_path=None):
if file_path is None:
file_path = os.path.join('data', 'Keyword_Stat.xlsx')
self.file_path = file_path
self.df = None

def load_data(self):
try:
self.df = pd.read_excel(self.file_path)
print("Data loaded successfully.")
except FileNotFoundError:
print(f"File not found at {self.file_path}. Please check the file path.")

def preprocess_data(self):
self.df['Average Bid'] = (self.df['Top of page bid (low range)'] + self.df['Top of page bid (high range)']) / 2
self.df['Competition (indexed value)'] = self.df['Competition (indexed value)'].fillna(0)

def calculate_ranks(self):
self.df['Rank'] = self.df['Avg. monthly searches'] / (1 + self.df['Competition (indexed value)'])
self.df['Adjusted Rank'] = self.df['Avg. monthly searches'] / ((1 + self.df['Competition (indexed value)']) * self.df['Average Bid'])
self.df['Adjusted Rank'] = self.df.apply(
lambda x: x['Avg. monthly searches'] / (1 + x['Competition (indexed value)']) if x['Average Bid'] == 0 else x['Adjusted Rank'], axis=1)

def sort_and_extract_top_keywords(self, top_n=200):
return self.df.sort_values(by='Adjusted Rank', ascending=False).head(top_n)

def save_to_excel(self, data, output_file_name='top_200_keywords_adjusted.xlsx'):
output_path = os.path.join('output', output_file_name)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
data.to_excel(output_path, index=False)
print(f"The adjusted list of top keywords has been saved to {output_path}")

def run_analysis(self, output_file_name='top_200_keywords_adjusted.xlsx', top_n=200):
output_path = os.path.join('output', output_file_name)
self.load_data()
if self.df is not None:
self.preprocess_data()
self.calculate_ranks()
top_keywords = self.sort_and_extract_top_keywords(top_n=top_n)
self.save_to_excel(output_path, top_keywords)
39 changes: 39 additions & 0 deletions source_file_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# source_file_transformer.py
import pandas as pd
import os

class SourceFileTransformer:
def __init__(self, source_file_path=''):
self.source_file_path = source_file_path
self.transformed_df = None

def load_and_transform_source(self):
try:
source_df = pd.read_excel(self.source_file_path, skiprows=2)
columns_to_keep = [
'Keyword', 'Avg. monthly searches', 'Competition',
'Competition (indexed value)', 'Top of page bid (low range)',
'Top of page bid (high range)', 'Ad impression share'
]
self.transformed_df = source_df[columns_to_keep]
data_types = {
'Keyword': 'object',
'Avg. monthly searches': 'int64',
'Competition': 'object',
'Competition (indexed value)': 'float64',
'Top of page bid (low range)': 'float64',
'Top of page bid (high range)': 'float64',
'Ad impression share': 'object'
}
self.transformed_df = self.transformed_df.astype(data_types)
print("Transformation successful.")
except Exception as e:
print(f"An error occurred during file loading or transformation: {e}")

def save_transformed_file(self, output_path=None):
if output_path is None:
output_path = os.path.join('data', 'Keyword_Stat.xlsx')
os.makedirs(os.path.dirname(output_path), exist_ok=True)
self.transformed_df.to_excel(output_path, index=False)
print(f"The transformed file has been saved to {output_path}")

0 comments on commit 9ea33a4

Please sign in to comment.