WeniVooks

검색

정규표현식 톺아보기 with JavaScript and Python

프로젝트 해답

1. 마크다운 파서 솔루션

1.1 JavaScript 구현
class MarkdownParser {
    constructor() {
        this.patterns = {
            headers: /^(#{1,6})\s+(.+)$/gm,
            links: /\[([^\]]+)\]\(([^)]+)\)/g,
            bold: /\*\*([^*]+)\*\*/g,
            italic: /\*([^*]+)\*/g
        };
    }
 
    parse(markdown) {
        let html = markdown;
        
        // 헤더 변환
        html = html.replace(this.patterns.headers, (match, hashes, text) => {
            const level = hashes.length;
            return `<h${level}>${text}</h${level}>`;
        });
 
        // 링크 변환
        html = html.replace(this.patterns.links, 
            (match, text, url) => `<a href="${url}">${text}</a>`);
 
        // 강조 구문 변환 (순서 중요: bold를 먼저 처리)
        html = html.replace(this.patterns.bold, 
            (match, text) => `<strong>${text}</strong>`);
        html = html.replace(this.patterns.italic, 
            (match, text) => `<em>${text}</em>`);
 
        return html;
    }
}
 
// 사용 예시
const parser = new MarkdownParser();
const markdown = `
# 메인 제목
## 부제목
 
이것은 [위니브](https://weniv.co.kr) 튜토리얼입니다.
**중요한** 내용과 *강조할* 내용이 있습니다.
`;
 
console.log(parser.parse(markdown));
class MarkdownParser {
    constructor() {
        this.patterns = {
            headers: /^(#{1,6})\s+(.+)$/gm,
            links: /\[([^\]]+)\]\(([^)]+)\)/g,
            bold: /\*\*([^*]+)\*\*/g,
            italic: /\*([^*]+)\*/g
        };
    }
 
    parse(markdown) {
        let html = markdown;
        
        // 헤더 변환
        html = html.replace(this.patterns.headers, (match, hashes, text) => {
            const level = hashes.length;
            return `<h${level}>${text}</h${level}>`;
        });
 
        // 링크 변환
        html = html.replace(this.patterns.links, 
            (match, text, url) => `<a href="${url}">${text}</a>`);
 
        // 강조 구문 변환 (순서 중요: bold를 먼저 처리)
        html = html.replace(this.patterns.bold, 
            (match, text) => `<strong>${text}</strong>`);
        html = html.replace(this.patterns.italic, 
            (match, text) => `<em>${text}</em>`);
 
        return html;
    }
}
 
// 사용 예시
const parser = new MarkdownParser();
const markdown = `
# 메인 제목
## 부제목
 
이것은 [위니브](https://weniv.co.kr) 튜토리얼입니다.
**중요한** 내용과 *강조할* 내용이 있습니다.
`;
 
console.log(parser.parse(markdown));
1.2 Python 구현
import re
 
class MarkdownParser:
    def __init__(self):
        self.patterns = {
            'headers': re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE),
            'links': re.compile(r'\[([^\]]+)\]\(([^)]+)\)'),
            'bold': re.compile(r'\*\*([^*]+)\*\*'),
            'italic': re.compile(r'\*([^*]+)\*)')
        }
    
    def parse(self, markdown):
        html = markdown
        
        # 헤더 변환
        def header_replace(match):
            level = len(match.group(1))
            return f'<h{level}>{match.group(2)}</h{level}>'
        
        html = self.patterns['headers'].sub(header_replace, html)
        
        # 링크 변환
        html = self.patterns['links'].sub(r'<a href="\2">\1</a>', html)
        
        # 강조 구문 변환
        html = self.patterns['bold'].sub(r'<strong>\1</strong>', html)
        html = self.patterns['italic'].sub(r'<em>\1</em>', html)
        
        return html
 
# 사용 예시
parser = MarkdownParser()
markdown = """
# 메인 제목
## 부제목
 
이것은 [위니브](https://weniv.co.kr) 튜토리얼입니다.
**중요한** 내용과 *강조할* 내용이 있습니다.
"""
 
print(parser.parse(markdown))
import re
 
class MarkdownParser:
    def __init__(self):
        self.patterns = {
            'headers': re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE),
            'links': re.compile(r'\[([^\]]+)\]\(([^)]+)\)'),
            'bold': re.compile(r'\*\*([^*]+)\*\*'),
            'italic': re.compile(r'\*([^*]+)\*)')
        }
    
    def parse(self, markdown):
        html = markdown
        
        # 헤더 변환
        def header_replace(match):
            level = len(match.group(1))
            return f'<h{level}>{match.group(2)}</h{level}>'
        
        html = self.patterns['headers'].sub(header_replace, html)
        
        # 링크 변환
        html = self.patterns['links'].sub(r'<a href="\2">\1</a>', html)
        
        # 강조 구문 변환
        html = self.patterns['bold'].sub(r'<strong>\1</strong>', html)
        html = self.patterns['italic'].sub(r'<em>\1</em>', html)
        
        return html
 
# 사용 예시
parser = MarkdownParser()
markdown = """
# 메인 제목
## 부제목
 
이것은 [위니브](https://weniv.co.kr) 튜토리얼입니다.
**중요한** 내용과 *강조할* 내용이 있습니다.
"""
 
print(parser.parse(markdown))

2. 로그 파일 분석기 솔루션

2.1 JavaScript 구현
class LogAnalyzer {
    constructor() {
        this.pattern = /^(\S+) \S+ \S+ \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) \S+" (\d{3}) (\d+)$/;
    }
 
    analyze(logs) {
        const ipCounts = {};
        const statusCodes = {};
        const pageHits = {};
        const hourlyHits = {};
 
        logs.split('\n').forEach(line => {
            if (!line.trim()) return;
            
            const match = this.pattern.exec(line);
            if (match) {
                const [, ip, timestamp, method, page, status, bytes] = match;
                
                // IP 카운트
                ipCounts[ip] = (ipCounts[ip] || 0) + 1;
                
                // 상태 코드 분석
                statusCodes[status] = (statusCodes[status] || 0) + 1;
                
                // 페이지 히트 분석
                pageHits[page] = (pageHits[page] || 0) + 1;
                
                // 시간별 분석
                const hour = new Date(timestamp.replace(':', ' ')).getHours();
                hourlyHits[hour] = (hourlyHits[hour] || 0) + 1;
            }
        });
 
        return {
            ipStats: ipCounts,
            statusStats: statusCodes,
            pageStats: this.getTop5(pageHits),
            hourlyStats: hourlyHits
        };
    }
 
    getTop5(data) {
        return Object.entries(data)
            .sort(([,a], [,b]) => b - a)
            .slice(0, 5)
            .reduce((obj, [k, v]) => ({ ...obj, [k]: v }), {});
    }
}
class LogAnalyzer {
    constructor() {
        this.pattern = /^(\S+) \S+ \S+ \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) \S+" (\d{3}) (\d+)$/;
    }
 
    analyze(logs) {
        const ipCounts = {};
        const statusCodes = {};
        const pageHits = {};
        const hourlyHits = {};
 
        logs.split('\n').forEach(line => {
            if (!line.trim()) return;
            
            const match = this.pattern.exec(line);
            if (match) {
                const [, ip, timestamp, method, page, status, bytes] = match;
                
                // IP 카운트
                ipCounts[ip] = (ipCounts[ip] || 0) + 1;
                
                // 상태 코드 분석
                statusCodes[status] = (statusCodes[status] || 0) + 1;
                
                // 페이지 히트 분석
                pageHits[page] = (pageHits[page] || 0) + 1;
                
                // 시간별 분석
                const hour = new Date(timestamp.replace(':', ' ')).getHours();
                hourlyHits[hour] = (hourlyHits[hour] || 0) + 1;
            }
        });
 
        return {
            ipStats: ipCounts,
            statusStats: statusCodes,
            pageStats: this.getTop5(pageHits),
            hourlyStats: hourlyHits
        };
    }
 
    getTop5(data) {
        return Object.entries(data)
            .sort(([,a], [,b]) => b - a)
            .slice(0, 5)
            .reduce((obj, [k, v]) => ({ ...obj, [k]: v }), {});
    }
}
2.2 Python 구현
import re
from collections import Counter
from datetime import datetime
 
class LogAnalyzer:
    def __init__(self):
        self.pattern = re.compile(
            r'^(\S+) \S+ \S+ \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) \S+" (\d{3}) (\d+)$'
        )
 
    def analyze(self, logs):
        ip_counts = Counter()
        status_codes = Counter()
        page_hits = Counter()
        hourly_hits = Counter()
 
        for line in logs.split('\n'):
            if not line.strip():
                continue
 
            match = self.pattern.match(line)
            if match:
                ip, timestamp, method, page, status, bytes = match.groups()
                
                # IP 카운트
                ip_counts[ip] += 1
                
                # 상태 코드 분석
                status_codes[status] += 1
                
                # 페이지 히트 분석
                page_hits[page] += 1
                
                # 시간별 분석
                hour = datetime.strptime(
                    timestamp.split()[0], 
                    '%d/%b/%Y:%H:%M:%S'
                ).hour
                hourly_hits[hour] += 1
 
        return {
            'ip_stats': dict(ip_counts),
            'status_stats': dict(status_codes),
            'page_stats': dict(page_hits.most_common(5)),
            'hourly_stats': dict(hourly_hits)
        }
import re
from collections import Counter
from datetime import datetime
 
class LogAnalyzer:
    def __init__(self):
        self.pattern = re.compile(
            r'^(\S+) \S+ \S+ \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) \S+" (\d{3}) (\d+)$'
        )
 
    def analyze(self, logs):
        ip_counts = Counter()
        status_codes = Counter()
        page_hits = Counter()
        hourly_hits = Counter()
 
        for line in logs.split('\n'):
            if not line.strip():
                continue
 
            match = self.pattern.match(line)
            if match:
                ip, timestamp, method, page, status, bytes = match.groups()
                
                # IP 카운트
                ip_counts[ip] += 1
                
                # 상태 코드 분석
                status_codes[status] += 1
                
                # 페이지 히트 분석
                page_hits[page] += 1
                
                # 시간별 분석
                hour = datetime.strptime(
                    timestamp.split()[0], 
                    '%d/%b/%Y:%H:%M:%S'
                ).hour
                hourly_hits[hour] += 1
 
        return {
            'ip_stats': dict(ip_counts),
            'status_stats': dict(status_codes),
            'page_stats': dict(page_hits.most_common(5)),
            'hourly_stats': dict(hourly_hits)
        }

3. 데이터 정제 변환기 솔루션

3.1 JavaScript 구현
class DataCleaner {
    constructor() {
        this.datePatterns = [
            {
                pattern: /(\d{4})\/(\d{2})\/(\d{2})/,
                formatter: (m) => `${m[1]}-${m[2]}-${m[3]}`
            },
            {
                pattern: /(\d{2})-(\d{2})-(\d{4})/,
                formatter: (m) => `${m[3]}-${m[1]}-${m[2]}`
            },
            {
                pattern: /([A-Za-z]{3})\s+(\d{2}),\s+(\d{4})/,
                formatter: (m) => {
                    const months = {
                        Jan: '01', Feb: '02', Mar: '03', Apr: '04',
                        May: '05', Jun: '06', Jul: '07', Aug: '08',
                        Sep: '09', Oct: '10', Nov: '11', Dec: '12'
                    };
                    return `${m[3]}-${months[m[1]]}-${m[2]}`;
                }
            }
        ];
    }
 
    normalizeDate(date) {
        for (const {pattern, formatter} of this.datePatterns) {
            const match = date.match(pattern);
            if (match) {
                return formatter(match);
            }
        }
        return date;
    }
 
    normalizeEmail(email) {
        return email
            .toLowerCase()
            .replace(/\.+/g, '.')
            .replace(/\+[^@]+@/, '@')
            .replace(/\.@/, '@');
    }
 
    normalizePhone(phone) {
        const cleaned = phone.replace(/[\s\-\(\)]/g, '');
        
        // 국제 번호 처리
        if (cleaned.startsWith('+82')) {
            return cleaned
                .replace(/^\+82/, '0')
                .replace(/(\d{3})(\d{4})(\d{4})/, '$1-$2-$3');
        }
        
        // 일반 전화번호 처리
        if (cleaned.startsWith('02')) {
            return cleaned.replace(/^(\d{2})(\d{3,4})(\d{4})/, '$1-$2-$3');
        }
        
        // 휴대폰 번호 처리
        return cleaned.replace(/(\d{3})(\d{4})(\d{4})/, '$1-$2-$3');
    }
}
class DataCleaner {
    constructor() {
        this.datePatterns = [
            {
                pattern: /(\d{4})\/(\d{2})\/(\d{2})/,
                formatter: (m) => `${m[1]}-${m[2]}-${m[3]}`
            },
            {
                pattern: /(\d{2})-(\d{2})-(\d{4})/,
                formatter: (m) => `${m[3]}-${m[1]}-${m[2]}`
            },
            {
                pattern: /([A-Za-z]{3})\s+(\d{2}),\s+(\d{4})/,
                formatter: (m) => {
                    const months = {
                        Jan: '01', Feb: '02', Mar: '03', Apr: '04',
                        May: '05', Jun: '06', Jul: '07', Aug: '08',
                        Sep: '09', Oct: '10', Nov: '11', Dec: '12'
                    };
                    return `${m[3]}-${months[m[1]]}-${m[2]}`;
                }
            }
        ];
    }
 
    normalizeDate(date) {
        for (const {pattern, formatter} of this.datePatterns) {
            const match = date.match(pattern);
            if (match) {
                return formatter(match);
            }
        }
        return date;
    }
 
    normalizeEmail(email) {
        return email
            .toLowerCase()
            .replace(/\.+/g, '.')
            .replace(/\+[^@]+@/, '@')
            .replace(/\.@/, '@');
    }
 
    normalizePhone(phone) {
        const cleaned = phone.replace(/[\s\-\(\)]/g, '');
        
        // 국제 번호 처리
        if (cleaned.startsWith('+82')) {
            return cleaned
                .replace(/^\+82/, '0')
                .replace(/(\d{3})(\d{4})(\d{4})/, '$1-$2-$3');
        }
        
        // 일반 전화번호 처리
        if (cleaned.startsWith('02')) {
            return cleaned.replace(/^(\d{2})(\d{3,4})(\d{4})/, '$1-$2-$3');
        }
        
        // 휴대폰 번호 처리
        return cleaned.replace(/(\d{3})(\d{4})(\d{4})/, '$1-$2-$3');
    }
}
3.2 Python 구현
import re
from datetime import datetime
 
class DataCleaner:
    def __init__(self):
        self.date_patterns = [
            # YYYY/MM/DD
            (
                re.compile(r'(\d{4})/(\d{2})/(\d{2})'),
                lambda m: f"{m.group(1)}-{m.group(2)}-{m.group(3)}"
            ),
            # DD-MM-YYYY
            (
                re.compile(r'(\d{2})-(\d{2})-(\d{4})'),
                lambda m: f"{m.group(3)}-{m.group(2)}-{m.group(1)}"
            ),
            # Month DD, YYYY
            (
                re.compile(r'([A-Za-z]{3})\s+(\d{2}),\s+(\d{4})'),
                lambda m: datetime.strptime(
                    f"{m.group(1)} {m.group(2)} {m.group(3)}", 
                    "%b %d %Y"
                ).strftime("%Y-%m-%d")
            )
        ]
 
    def normalize_date(self, date):
        for pattern, formatter in self.date_patterns:
            match = pattern.match(date)
            if match:
                return formatter(match)
        return date
 
    def normalize_email(self, email):
        email = email.lower()
        
        # 중복된 점 제거
        while '..' in email:
            email = email.replace('..', '.')
            
        # 태그 제거
        email = re.sub(r'\+[^@]+@', '@', email)
        
        # 마지막 점 처리
        email = email.replace('.@', '@')
        
        return email
 
    def normalize_phone(self, phone):
        # 모든 특수문자 제거
        cleaned = re.sub(r'[\s\-\(\)]', '', phone)
        
        # 국제 번호 처리
        if cleaned.startswith('+82'):
            cleaned = '0' + cleaned[3:]
        
        # 지역 번호 처리
        if cleaned.startswith('02'):
            return re.sub(
                r'^(\d{2})(\d{3,4})(\d{4})$',
                r'\1-\2-\3',
                cleaned
            )
        
        # 휴대폰 번호 처리
        return re.sub(
            r'(\d{3})(\d{4})(\d{4})',
            r'\1-\2-\3',
            cleaned
        )
import re
from datetime import datetime
 
class DataCleaner:
    def __init__(self):
        self.date_patterns = [
            # YYYY/MM/DD
            (
                re.compile(r'(\d{4})/(\d{2})/(\d{2})'),
                lambda m: f"{m.group(1)}-{m.group(2)}-{m.group(3)}"
            ),
            # DD-MM-YYYY
            (
                re.compile(r'(\d{2})-(\d{2})-(\d{4})'),
                lambda m: f"{m.group(3)}-{m.group(2)}-{m.group(1)}"
            ),
            # Month DD, YYYY
            (
                re.compile(r'([A-Za-z]{3})\s+(\d{2}),\s+(\d{4})'),
                lambda m: datetime.strptime(
                    f"{m.group(1)} {m.group(2)} {m.group(3)}", 
                    "%b %d %Y"
                ).strftime("%Y-%m-%d")
            )
        ]
 
    def normalize_date(self, date):
        for pattern, formatter in self.date_patterns:
            match = pattern.match(date)
            if match:
                return formatter(match)
        return date
 
    def normalize_email(self, email):
        email = email.lower()
        
        # 중복된 점 제거
        while '..' in email:
            email = email.replace('..', '.')
            
        # 태그 제거
        email = re.sub(r'\+[^@]+@', '@', email)
        
        # 마지막 점 처리
        email = email.replace('.@', '@')
        
        return email
 
    def normalize_phone(self, phone):
        # 모든 특수문자 제거
        cleaned = re.sub(r'[\s\-\(\)]', '', phone)
        
        # 국제 번호 처리
        if cleaned.startswith('+82'):
            cleaned = '0' + cleaned[3:]
        
        # 지역 번호 처리
        if cleaned.startswith('02'):
            return re.sub(
                r'^(\d{2})(\d{3,4})(\d{4})$',
                r'\1-\2-\3',
                cleaned
            )
        
        # 휴대폰 번호 처리
        return re.sub(
            r'(\d{3})(\d{4})(\d{4})',
            r'\1-\2-\3',
            cleaned
        )
3.1 실무 활용 프로젝트