initial commit

hashangit · Mar 11, 2024 · 9ea33a4 · 9ea33a4
1 parent 3f7c8e3
commit 9ea33a4
Show file tree

Hide file tree

Showing 5 changed files with 184 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,97 @@
+# Keywads Library
+
+## About
+
+The Keywads Library is a Python package designed to simplify the process of analyzing keyword data from Excel files. It includes tools for transforming source files into a standardized format and analyzing keyword metrics to rank keywords based on competition and search volume.
+
+## Installation
+
+To use this library, clone or download it into your project directory or a location in your Python path.
+
+```bash
+git clone https://github.com/hashangit/keywads.git
+```
+
+## Prerequisites
+
+Ensure you have Python and `pandas` installed in your environment:
+
+```bash
+pip install pandas
+```
+
+## How to Use
+
+### Structure Your Application Folder
+
+We recommend structuring your application folder as follows for ease of use with default settings:
+
+```
+your_project/
+│
+├── keywads/
+│   ├── __init__.py
+│   ├── source_file_transformer.py
+│   └── keyword_analyzer.py
+│
+├── data/
+│   └── your_source_file.xlsx
+│
+├── output/
+│
+└── your_script.py
+```
+
+### Example Usage
+
+1. **Transforming a Source File**: First, ensure your source Excel file is in the `data` folder. Then use `SourceFileTransformer` to standardize the format.
+
+2. **Analyzing Keywords**: After transforming the source file, use `KeywordAnalyzer` to analyze and rank keywords, saving the results in the `output` folder.
+
+Here's an example snippet you can put in `your_script.py`:
+
+```python
+from keywads import SourceFileTransformer, KeywordAnalyzer
+
+# Transform the source file
+transformer = SourceFileTransformer(source_file_path='data/your_source_file.xlsx')
+transformer.load_and_transform_source()
+transformer.save_transformed_file()
+
+# Analyze keywords
+analyzer = KeywordAnalyzer()
+analyzer.run_analysis()
+```
+
+This will process `your_source_file.xlsx` from the `data` folder, standardize it, and then analyze the keywords, saving the results as `top_200_keywords_adjusted.xlsx` in the `output` folder.
+
+### Custom Paths
+
+You can also specify custom paths for both the source and output files if your project structure differs from the recommended setup:
+
+```python
+# Specify custom paths
+custom_source_path = 'path/to/your/custom_source_file.xlsx'
+custom_output_path = 'path/to/your/custom_output_directory/'
+
+# Initialize the transformer with a custom path
+transformer = SourceFileTransformer(source_file_path=custom_source_path)
+transformer.load_and_transform_source()
+transformer.save_transformed_file(output_path=custom_output_path + 'Keyword_Stat.xlsx')
+
+# Initialize the analyzer with custom input and output paths
+analyzer = KeywordAnalyzer(file_path=custom_output_path + 'Keyword_Stat.xlsx')
+analyzer.run_analysis(output_file_name='Custom_Top_200_Keywords.xlsx')
+```
+
+### Note
+
+Remember to replace `your_source_file.xlsx`, `path/to/your/custom_source_file.xlsx`, and `path/to/your/custom_output_directory/` with the actual paths to your files and directories.
+
+## Support
+
+For support, please open an issue in the GitHub repository at https://github.com/hashangit/keywads.
+
+## License
+
+[MIT License](LICENSE) - see the LICENSE file for details.
diff --git a/__init__.py b/__init__.py
@@ -0,0 +1,3 @@
+# keyword_analysis_lib/__init__.py
+from .source_file_transformer import SourceFileTransformer
+from .keyword_analyzer import KeywordAnalyzer
diff --git a/data/Keyword_Stat.xlsx b/data/Keyword_Stat.xlsx
diff --git a/keyword_analyzer.py b/keyword_analyzer.py
@@ -0,0 +1,45 @@
+# keyword_analyzer.py
+import pandas as pd
+import os
+
+class KeywordAnalyzer:
+    def __init__(self, file_path=None):
+        if file_path is None:
+            file_path = os.path.join('data', 'Keyword_Stat.xlsx')
+        self.file_path = file_path
+        self.df = None
+
+    def load_data(self):
+        try:
+            self.df = pd.read_excel(self.file_path)
+            print("Data loaded successfully.")
+        except FileNotFoundError:
+            print(f"File not found at {self.file_path}. Please check the file path.")
+
+    def preprocess_data(self):
+        self.df['Average Bid'] = (self.df['Top of page bid (low range)'] + self.df['Top of page bid (high range)']) / 2
+        self.df['Competition (indexed value)'] = self.df['Competition (indexed value)'].fillna(0)
+
+    def calculate_ranks(self):
+        self.df['Rank'] = self.df['Avg. monthly searches'] / (1 + self.df['Competition (indexed value)'])
+        self.df['Adjusted Rank'] = self.df['Avg. monthly searches'] / ((1 + self.df['Competition (indexed value)']) * self.df['Average Bid'])
+        self.df['Adjusted Rank'] = self.df.apply(
+            lambda x: x['Avg. monthly searches'] / (1 + x['Competition (indexed value)']) if x['Average Bid'] == 0 else x['Adjusted Rank'], axis=1)
+
+    def sort_and_extract_top_keywords(self, top_n=200):
+        return self.df.sort_values(by='Adjusted Rank', ascending=False).head(top_n)
+
+    def save_to_excel(self, data, output_file_name='top_200_keywords_adjusted.xlsx'):
+        output_path = os.path.join('output', output_file_name)
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        data.to_excel(output_path, index=False)
+        print(f"The adjusted list of top keywords has been saved to {output_path}")
+
+    def run_analysis(self, output_file_name='top_200_keywords_adjusted.xlsx', top_n=200):
+        output_path = os.path.join('output', output_file_name)
+        self.load_data()
+        if self.df is not None:
+            self.preprocess_data()
+            self.calculate_ranks()
+            top_keywords = self.sort_and_extract_top_keywords(top_n=top_n)
+            self.save_to_excel(output_path, top_keywords)
diff --git a/source_file_transformer.py b/source_file_transformer.py
@@ -0,0 +1,39 @@
+# source_file_transformer.py
+import pandas as pd
+import os
+
+class SourceFileTransformer:
+    def __init__(self, source_file_path=''):
+        self.source_file_path = source_file_path
+        self.transformed_df = None
+
+    def load_and_transform_source(self):
+        try:
+            source_df = pd.read_excel(self.source_file_path, skiprows=2)
+            columns_to_keep = [
+                'Keyword', 'Avg. monthly searches', 'Competition',
+                'Competition (indexed value)', 'Top of page bid (low range)',
+                'Top of page bid (high range)', 'Ad impression share'
+            ]
+            self.transformed_df = source_df[columns_to_keep]
+            data_types = {
+                'Keyword': 'object',
+                'Avg. monthly searches': 'int64',
+                'Competition': 'object',
+                'Competition (indexed value)': 'float64',
+                'Top of page bid (low range)': 'float64',
+                'Top of page bid (high range)': 'float64',
+                'Ad impression share': 'object'
+            }
+            self.transformed_df = self.transformed_df.astype(data_types)
+            print("Transformation successful.")
+        except Exception as e:
+            print(f"An error occurred during file loading or transformation: {e}")
+
+    def save_transformed_file(self, output_path=None):
+        if output_path is None:
+            output_path = os.path.join('data', 'Keyword_Stat.xlsx')
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        self.transformed_df.to_excel(output_path, index=False)
+        print(f"The transformed file has been saved to {output_path}")
+