def _show_concepts(self): print("\n🔑 KEY CONCEPTS:") for i, concept in enumerate(self.analyzer.key_concepts[:15], 1): print(f"\ni. concept['term'].upper() (appears concept['frequency']x)") if concept['context']: print(f" Context: concept['context'][0][:150]...")
def _show_questions(self): questions = self.analyzer.generate_study_questions() print("\n❓ STUDY QUESTIONS:") for i, q in enumerate(questions, 1): print(f"\ni. q['question']") print(f" 💡 Hint: q['hint']") urban planning lecture notes pdf
class UrbanPlanningNotesAnalyzer: def (self, pdf_path: str): self.pdf_path = pdf_path self.full_text = "" self.pages_text = [] self.sections = {} self.key_concepts = [] self.case_studies = [] r'(?i)core (?:concept|principle)[s]?: (.+?)[\.\n]'
def _extract_principles(self) -> List[str]: """Extract core urban planning principles""" principle_patterns = [ r'(?i)principle[s]? of (.+?)[\.\n]', r'(?i)core (?:concept|principle)[s]?: (.+?)[\.\n]', r'(?i)([^.]*?(?:should|must|requires|essential|crucial|important)[^.]*?\.)' ] principles = [] for pattern in principle_patterns: matches = re.findall(pattern, self.full_text) principles.extend(matches[:5]) return principles[:10] 100: questions.append( 'type': 'section'
def extract_text_from_pdf(self) -> str: """Extract text from PDF file""" text = "" with open(self.pdf_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) for page_num, page in enumerate(pdf_reader.pages): page_text = page.extract_text() self.pages_text.append( 'page_num': page_num + 1, 'text': page_text ) text += page_text + "\n" self.full_text = text return text
def generate_study_questions(self) -> List[Dict]: """Generate study questions based on key concepts and sections""" questions = [] # Generate questions from key concepts for concept in self.key_concepts[:10]: questions.append( 'type': 'concept', 'question': f"What are the key principles and applications of concept['term'] in urban planning?", 'related_concept': concept['term'], 'hint': f"Review section discussing concept['term'] (mentioned concept['frequency'] times)" ) # Generate questions from sections for section_name, section_text in list(self.sections.items())[:5]: if len(section_text) > 100: questions.append( 'type': 'section', 'question': f"Summarize the main arguments presented in 'section_name' regarding urban planning approaches.", 'related_section': section_name, 'hint': "Focus on the key definitions and examples provided" ) # Add comparative questions if len(self.case_studies) >= 2: questions.append( 'type': 'comparative', 'question': f"Compare and contrast the urban planning approaches in 'self.case_studies[0]['title']' vs 'self.case_studies[1]['title']'.", 'hint': "Consider differences in context, implementation, and outcomes" ) return questions
import PyPDF2 import re from typing import List, Dict, Tuple import json from collections import Counter import nltk from nltk.corpus import stopwords from nltk.tokenize import sent_tokenize, word_tokenize from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import spacy Download required NLTK data nltk.download('punkt') nltk.download('stopwords') nltk.download('averaged_perceptron_tagger') Load spaCy model (run: python -m spacy download en_core_web_sm) nlp = spacy.load('en_core_web_sm')