Grow-with-Open-Source · iamwatchdogs · Aug 7, 2025 · Jul 22, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/Youtube_video_finder_using_geminillm/README.md b/Youtube_video_finder_using_geminillm/README.md
@@ -0,0 +1,65 @@
+"This is a Open source project initiated by me -you can fork, edit, and make a pull request" 
+
+````markdown
+# YouTube Relevance Finder with Gemini AI
+
+This Python script searches YouTube for recent videos based on a user query and ranks them by relevance using Google's Gemini AI model and Youtube API. It filters results by duration and recency, scores video titles for relevance, and returns the top-ranked videos.
+
+## 🔍 Features
+
+- Searches YouTube for videos from the past 14 days using Youtube API which is publicly available.
+- Filters videos by duration (4–20 minutes)
+- Uses Gemini AI to score title relevance to a query
+- Prints the top relevant video links with scores and metadata
+
+## 🛠️ Setup
+
+1. **Clone the repository**:
+   ```bash
+   git clone https://github.com/yourusername/your-repo-name.git
+   cd your-repo-name
+````
+
+2. **Install dependencies**:
+
+   ```bash
+   pip install google-api-python-client google-generativeai
+   ```
+
+3. **Set up environment variables**:
+   Create a `.env` file or export in terminal:
+
+   ```bash
+   export YT_API_KEY=your_youtube_api_key
+   export GEMINI_API_KEY=your_gemini_api_key
+   ```
+
+## 🚀 Usage
+
+Run the script:
+
+```bash
+python your_script_name.py
+```
+
+You'll be prompted to enter a search query. The script will then display a list of the top relevant YouTube videos based on that query.
+
+## 📄 Example Output
+
+```
+1. 
+ • Title: Learn Python in 10 Minutes
+ • URL: https://youtu.be/xyz123
+ • Score: 9.2
+ • Duration: 10m30s
+ • Published: 2025-05-01T12:34:56Z
+```
+
+## 📌 Notes
+
+* Make sure you have valid API keys for both YouTube Data API v3 and Google Gemini.
+* The script currently uses the `gemini-1.5-flash-latest` model.
+
+## 📃 License
+
+Open source – feel free to use and modify
diff --git a/Youtube_video_finder_using_geminillm/app.py b/Youtube_video_finder_using_geminillm/app.py
@@ -0,0 +1,180 @@
+import os
+import datetime
+from googleapiclient.discovery import build
+import google.generativeai as genai
+
+# ——— CONFIG ———
+# Initialize clients with environment variables
+yt = build("youtube", "v3", developerKey=os.environ["YT_API_KEY"])
+
+# Configure the Google Generative AI client
+genai.configure(api_key=os.environ["GEMINI_API_KEY"])
+
+# Initialize the Gemini model
+model = genai.GenerativeModel('gemini-1.5-flash-latest')
+
+
+def search_videos(query, max_filtered_results=20):
+    """
+    Search for YouTube videos matching a query, filtering by recency and duration.
+
+    This function keeps searching until it finds enough videos that meet the criteria
+    or exhausts the search results.
+    """
+    # Calculate publishedAfter timestamp (14 days ago)
+    fourteen_days_ago = (datetime.datetime.utcnow()
+                        - datetime.timedelta(days=14)).isoformat("T") + "Z"
+
+    filtered_videos = []
+    next_page_token = None
+    page_count = 0
+    max_pages = 5  # Limit the number of pages to search to avoid excessive API calls
+
+    # Continue searching until we have enough filtered videos or run out of results
+    while len(filtered_videos) < max_filtered_results and page_count < max_pages:
+        # Step 1: Search for videos matching the query
+        search_response = yt.search().list(
+            q=query,
+            part="id,snippet",
+            type="video",
+            order="relevance",
+            publishedAfter=fourteen_days_ago,
+            maxResults=50,  # Maximum allowed by the API
+            pageToken=next_page_token
+        ).execute()
+
+        page_count += 1
+
+        # Step 2: Collect video IDs from this page
+        video_ids = [item["id"]["videoId"] for item in search_response.get("items", [])]
+
+        # Break if no more videos found
+        if not video_ids:
+            break
+
+        # Step 3: Get details for the fetched videos
+        details = yt.videos().list(
+            part="contentDetails,snippet",
+            id=",".join(video_ids)
+        ).execute()
+
+        # Step 4: Filter by duration (4–20 minutes)
+        for item in details.get("items", []):
+            try:
+                # Parse duration (ISO 8601 format, e.g. "PT5M30S")
+                dur = item["contentDetails"]["duration"].replace("PT","")
+
+                # Skip videos with hours or without minutes
+                if "H" in dur or "M" not in dur:
+                    continue
+
+                # Split minutes and seconds
+                parts = dur.split("M")
+                mins = int(parts[0])
+                secs = parts[1].replace("S","") if len(parts) > 1 else "0"
+                seconds = int(secs) if secs else 0
+
+                total_seconds = mins * 60 + seconds
+
+                # Filter by duration (4 to 20 minutes inclusive)
+                if 4 * 60 <= total_seconds <= 20 * 60:
+                    filtered_videos.append({
+                        "id": item["id"],
+                        "title": item["snippet"]["title"],
+                        "duration": total_seconds,
+                        "publishedAt": item["snippet"]["publishedAt"]
+                    })
+
+                    # If we've found enough videos, we can stop
+                    if len(filtered_videos) >= max_filtered_results:
+                        break
+            except Exception as e:
+                print(f"Could not parse duration for video {item.get('id', 'N/A')}: {e}")
+                continue
+
+        # Check if there are more pages of results
+        next_page_token = search_response.get("nextPageToken")
+        if not next_page_token:
+            break
+
+        print(f"Found {len(filtered_videos)} qualifying videos so far. Searching next page...")
+
+    print(f"Search completed. Found {len(filtered_videos)} videos meeting criteria.")
+    return filtered_videos
+
+
+def score_title(title, query):
+    """Score a video title's relevance to the query using Gemini AI."""
+    prompt = (
+        f"Query: {query}\n"
+        f"Title: {title}\n"
+        "Rate relevance & quality 1–10 (just give the number)."
+    )
+    try:
+        response = model.generate_content(prompt)
+        score_text = response.text.strip()
+        # Try to extract just the number if there's additional text
+        import re
+        match = re.search(r'\b([0-9]|10)(\.[0-9]+)?\b', score_text)
+        if match:
+            score = float(match.group(0))
+        else:
+            score = float(score_text)
+        return score
+    except ValueError:
+        print(f"Model returned non-numeric score for '{title}': '{score_text}'")
+        return 5.0  # Default middle score instead of 0
+    except Exception as e:
+        print(f"Error scoring title '{title}': {e}")
+        if 'response' in locals() and hasattr(response, 'text'):
+             print(f"API response text: {response.text}")
+        return 5.0  # Default middle score
+
+
+def pick_best(query, num_results=20):
+    """
+    Find and score the best YouTube videos for a query.
+
+    Args:
+        query: Search query string
+        num_results: Number of top videos to return
+    """
+    # Get more videos than we need to ensure we have enough after scoring
+    vids = search_videos(query, max_filtered_results=max(30, num_results * 1.5))
+
+    if not vids:
+        print("No suitable videos found after applying filters.")
+        return
+
+    # Score each video
+    print(f"Scoring {len(vids)} videos...")
+    for i, v in enumerate(vids):
+        v["score"] = score_title(v["title"], query)
+        print(f"  Scored video {i+1}/{len(vids)}: '{v['title']}' - Score: {v['score']:.2f}")
+
+    # Sort by score in descending order
+    vids.sort(key=lambda x: x.get("score", 0.0), reverse=True)
+
+    # Print the top num_results
+    result_count = min(num_results, len(vids))
+    print(f"\n--- Top {result_count} Relevant Videos ---")
+
+    for i, video in enumerate(vids[:num_results]):
+        print(f"\n{i+1}.")
+        print(f" • Title: {video.get('title', 'N/A')}")
+        print(f" • URL:   https://youtu.be/{video.get('id', 'N/A')}")
+        print(f" • Score: {video.get('score', 0.0):.2f}")
+        duration_sec = video.get('duration', 0)
+        print(f" • Duration: {duration_sec // 60}m{duration_sec % 60:02d}s")
+        print(f" • Published: {video.get('publishedAt', 'N/A')}")
+
+
+# —— RUN IT! ——
+if __name__ == "__main__":
+    # Check if API keys are set
+    if "YT_API_KEY" not in os.environ or "GEMINI_API_KEY" not in os.environ:
+        print("Error: YouTube and/or Gemini API keys not set in environment variables.")
+    else:
+        user_query = input("Enter your search (voice-to-text or text): ")
+        # Call pick_best with the desired number of results
+        pick_best(user_query, num_results=20)