Linux内核,mmap_pgoff在mmap.c的实现

ops/2025/3/16 15:46:01/

1. mmap_pgoff的系统调用实现如下

SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,unsigned long, prot, unsigned long, flags,unsigned long, fd, unsigned long, pgoff)
{return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
}

2. ksys_mmap_pgoff函数

unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,unsigned long prot, unsigned long flags,unsigned long fd, unsigned long pgoff)
{struct file *file = NULL;unsigned long retval;if (!(flags & MAP_ANONYMOUS)) {audit_mmap_fd(fd, flags);file = fget(fd);if (!file)return -EBADF;if (is_file_hugepages(file)) {len = ALIGN(len, huge_page_size(hstate_file(file)));} else if (unlikely(flags & MAP_HUGETLB)) {retval = -EINVAL;goto out_fput;}} else if (flags & MAP_HUGETLB) {struct hstate *hs;hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);if (!hs)return -EINVAL;len = ALIGN(len, huge_page_size(hs));/** VM_NORESERVE is used because the reservations will be* taken when vm_ops->mmap() is called*/file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,VM_NORESERVE,HUGETLB_ANONHUGE_INODE,(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);if (IS_ERR(file))return PTR_ERR(file);}retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
out_fput:if (file)fput(file);return retval;
}

vm_mmap_pgoff函数

unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,unsigned long len, unsigned long prot,unsigned long flag, unsigned long pgoff)
{unsigned long ret;struct mm_struct *mm = current->mm;unsigned long populate;LIST_HEAD(uf);ret = security_mmap_file(file, prot, flag);if (!ret) {if (mmap_write_lock_killable(mm))return -EINTR;ret = do_mmap(file, addr, len, prot, flag, 0, pgoff, &populate,&uf);mmap_write_unlock(mm);userfaultfd_unmap_complete(mm, &uf);if (populate)mm_populate(ret, populate);}return ret;
}

do_mmap函数

/** The caller must write-lock current->mm->mmap_lock.*/
unsigned long do_mmap(struct file *file, unsigned long addr,unsigned long len, unsigned long prot,unsigned long flags, vm_flags_t vm_flags,unsigned long pgoff, unsigned long *populate,struct list_head *uf)
{struct mm_struct *mm = current->mm;int pkey = 0;*populate = 0;if (!len)return -EINVAL;/** Does the application expect PROT_READ to imply PROT_EXEC?** (the exception is when the underlying filesystem is noexec*  mounted, in which case we dont add PROT_EXEC.)*/if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))if (!(file && path_noexec(&file->f_path)))prot |= PROT_EXEC;/* force arch specific MAP_FIXED handling in get_unmapped_area */if (flags & MAP_FIXED_NOREPLACE)flags |= MAP_FIXED;if (!(flags & MAP_FIXED))addr = round_hint_to_min(addr);/* Careful about overflows.. */len = PAGE_ALIGN(len);if (!len)return -ENOMEM;/* offset overflow? */if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)return -EOVERFLOW;/* Too many mappings? */if (mm->map_count > sysctl_max_map_count)return -ENOMEM;/* Obtain the address to map to. we verify (or select) it and ensure* that it represents a valid section of the address space.*/addr = get_unmapped_area(file, addr, len, pgoff, flags);if (IS_ERR_VALUE(addr))return addr;if (flags & MAP_FIXED_NOREPLACE) {if (find_vma_intersection(mm, addr, addr + len))return -EEXIST;}if (prot == PROT_EXEC) {pkey = execute_only_pkey(mm);if (pkey < 0)pkey = 0;}/* Do simple checking here so the lower-level routines won't have* to. we assume access permissions have been handled by the open* of the memory object, so we don't do any here.*/vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;if (flags & MAP_LOCKED)if (!can_do_mlock())return -EPERM;if (!mlock_future_ok(mm, vm_flags, len))return -EAGAIN;if (file) {struct inode *inode = file_inode(file);unsigned long flags_mask;if (!file_mmap_ok(file, inode, pgoff, len))return -EOVERFLOW;flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;switch (flags & MAP_TYPE) {case MAP_SHARED:/** Force use of MAP_SHARED_VALIDATE with non-legacy* flags. E.g. MAP_SYNC is dangerous to use with* MAP_SHARED as you don't know which consistency model* you will get. We silently ignore unsupported flags* with MAP_SHARED to preserve backward compatibility.*/flags &= LEGACY_MAP_MASK;fallthrough;case MAP_SHARED_VALIDATE:if (flags & ~flags_mask)return -EOPNOTSUPP;if (prot & PROT_WRITE) {if (!(file->f_mode & FMODE_WRITE))return -EACCES;if (IS_SWAPFILE(file->f_mapping->host))return -ETXTBSY;}/** Make sure we don't allow writing to an append-only* file..*/if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))return -EACCES;vm_flags |= VM_SHARED | VM_MAYSHARE;if (!(file->f_mode & FMODE_WRITE))vm_flags &= ~(VM_MAYWRITE | VM_SHARED);fallthrough;case MAP_PRIVATE:if (!(file->f_mode & FMODE_READ))return -EACCES;if (path_noexec(&file->f_path)) {if (vm_flags & VM_EXEC)return -EPERM;vm_flags &= ~VM_MAYEXEC;}if (!file->f_op->mmap)return -ENODEV;if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))return -EINVAL;break;default:return -EINVAL;}} else {switch (flags & MAP_TYPE) {case MAP_SHARED:if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))return -EINVAL;/** Ignore pgoff.*/pgoff = 0;vm_flags |= VM_SHARED | VM_MAYSHARE;break;case MAP_PRIVATE:/** Set pgoff according to addr for anon_vma.*/pgoff = addr >> PAGE_SHIFT;break;default:return -EINVAL;}}/** Set 'VM_NORESERVE' if we should not account for the* memory use of this mapping.*/if (flags & MAP_NORESERVE) {/* We honor MAP_NORESERVE if allowed to overcommit */if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)vm_flags |= VM_NORESERVE;/* hugetlb applies strict overcommit unless MAP_NORESERVE */if (file && is_file_hugepages(file))vm_flags |= VM_NORESERVE;}addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);if (!IS_ERR_VALUE(addr) &&((vm_flags & VM_LOCKED) ||(flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))*populate = len;return addr;
}

get_unmapped_area函数

do_mmap的函数中的会使用get_unmapped_area获取addr

get_unmapped_area
unsigned long
get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,unsigned long pgoff, unsigned long flags)
{unsigned long (*get_area)(struct file *, unsigned long,unsigned long, unsigned long, unsigned long);unsigned long error = arch_mmap_check(addr, len, flags);if (error)return error;/* Careful about overflows.. */if (len > TASK_SIZE)return -ENOMEM;get_area = current->mm->get_unmapped_area;if (file) {if (file->f_op->get_unmapped_area)get_area = file->f_op->get_unmapped_area;} else if (flags & MAP_SHARED) {/** mmap_region() will call shmem_zero_setup() to create a file,* so use shmem's get_unmapped_area in case it can be huge.* do_mmap() will clear pgoff, so match alignment.*/pgoff = 0;get_area = shmem_get_unmapped_area;}addr = get_area(file, addr, len, pgoff, flags);if (IS_ERR_VALUE(addr))return addr;if (addr > TASK_SIZE - len)return -ENOMEM;if (offset_in_page(addr))return -EINVAL;error = security_mmap_addr(addr);return error ? error : addr;
}

mmap_region函数

get_unmapped_area获取到addr,之后addr传给mmap_region函数

unsigned long mmap_region(struct file *file, unsigned long addr,unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,struct list_head *uf)
{struct mm_struct *mm = current->mm;struct vm_area_struct *vma = NULL;struct vm_area_struct *next, *prev, *merge;pgoff_t pglen = len >> PAGE_SHIFT;unsigned long charged = 0;unsigned long end = addr + len;unsigned long merge_start = addr, merge_end = end;pgoff_t vm_pgoff;int error;VMA_ITERATOR(vmi, mm, addr);/* Check against address space limit. */if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {unsigned long nr_pages;/** MAP_FIXED may remove pages of mappings that intersects with* requested mapping. Account for the pages it would unmap.*/nr_pages = count_vma_pages_range(mm, addr, end);if (!may_expand_vm(mm, vm_flags,(len >> PAGE_SHIFT) - nr_pages))return -ENOMEM;}/* Unmap any existing mapping in the area */if (do_vmi_munmap(&vmi, mm, addr, len, uf, false))return -ENOMEM;/** Private writable mapping: check memory availability*/if (accountable_mapping(file, vm_flags)) {charged = len >> PAGE_SHIFT;if (security_vm_enough_memory_mm(mm, charged))return -ENOMEM;vm_flags |= VM_ACCOUNT;}next = vma_next(&vmi);prev = vma_prev(&vmi);if (vm_flags & VM_SPECIAL) {if (prev)vma_iter_next_range(&vmi);goto cannot_expand;}/* Attempt to expand an old mapping *//* Check next */if (next && next->vm_start == end && !vma_policy(next) &&can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,NULL_VM_UFFD_CTX, NULL)) {merge_end = next->vm_end;vma = next;vm_pgoff = next->vm_pgoff - pglen;}/* Check prev */if (prev && prev->vm_end == addr && !vma_policy(prev) &&(vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file,pgoff, vma->vm_userfaultfd_ctx, NULL) :can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,NULL_VM_UFFD_CTX, NULL))) {merge_start = prev->vm_start;vma = prev;vm_pgoff = prev->vm_pgoff;} else if (prev) {vma_iter_next_range(&vmi);}/* Actually expand, if possible */if (vma &&!vma_expand(&vmi, vma, merge_start, merge_end, vm_pgoff, next)) {khugepaged_enter_vma(vma, vm_flags);goto expanded;}if (vma == prev)vma_iter_set(&vmi, addr);
cannot_expand:/** Determine the object being mapped and call the appropriate* specific mapper. the address has already been validated, but* not unmapped, but the maps are removed from the list.*/vma = vm_area_alloc(mm);if (!vma) {error = -ENOMEM;goto unacct_error;}vma_iter_config(&vmi, addr, end);vma->vm_start = addr;vma->vm_end = end;vm_flags_init(vma, vm_flags);vma->vm_page_prot = vm_get_page_prot(vm_flags);vma->vm_pgoff = pgoff;if (file) {if (vm_flags & VM_SHARED) {error = mapping_map_writable(file->f_mapping);if (error)goto free_vma;}vma->vm_file = get_file(file);error = call_mmap(file, vma);if (error)goto unmap_and_free_vma;/** Expansion is handled above, merging is handled below.* Drivers should not alter the address of the VMA.*/error = -EINVAL;if (WARN_ON((addr != vma->vm_start)))goto close_and_free_vma;vma_iter_config(&vmi, addr, end);/** If vm_flags changed after call_mmap(), we should try merge* vma again as we may succeed this time.*/if (unlikely(vm_flags != vma->vm_flags && prev)) {merge = vma_merge(&vmi, mm, prev, vma->vm_start,vma->vm_end, vma->vm_flags, NULL,vma->vm_file, vma->vm_pgoff, NULL,NULL_VM_UFFD_CTX, NULL);if (merge) {/** ->mmap() can change vma->vm_file and fput* the original file. So fput the vma->vm_file* here or we would add an extra fput for file* and cause general protection fault* ultimately.*/fput(vma->vm_file);vm_area_free(vma);vma = merge;/* Update vm_flags to pick up the change. */vm_flags = vma->vm_flags;goto unmap_writable;}}vm_flags = vma->vm_flags;} else if (vm_flags & VM_SHARED) {error = shmem_zero_setup(vma);if (error)goto free_vma;} else {vma_set_anonymous(vma);}if (map_deny_write_exec(vma, vma->vm_flags)) {error = -EACCES;goto close_and_free_vma;}/* Allow architectures to sanity-check the vm_flags */error = -EINVAL;if (!arch_validate_flags(vma->vm_flags))goto close_and_free_vma;error = -ENOMEM;if (vma_iter_prealloc(&vmi, vma))goto close_and_free_vma;/* Lock the VMA since it is modified after insertion into VMA tree */vma_start_write(vma);vma_iter_store(&vmi, vma);mm->map_count++;if (vma->vm_file) {i_mmap_lock_write(vma->vm_file->f_mapping);if (vma->vm_flags & VM_SHARED)mapping_allow_writable(vma->vm_file->f_mapping);flush_dcache_mmap_lock(vma->vm_file->f_mapping);vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap);flush_dcache_mmap_unlock(vma->vm_file->f_mapping);i_mmap_unlock_write(vma->vm_file->f_mapping);}/** vma_merge() calls khugepaged_enter_vma() either, the below* call covers the non-merge case.*/khugepaged_enter_vma(vma, vma->vm_flags);/* Once vma denies write, undo our temporary denial count */
unmap_writable:if (file && vm_flags & VM_SHARED)mapping_unmap_writable(file->f_mapping);file = vma->vm_file;ksm_add_vma(vma);
expanded:perf_event_mmap(vma);vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);if (vm_flags & VM_LOCKED) {if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||is_vm_hugetlb_page(vma) ||vma == get_gate_vma(current->mm))vm_flags_clear(vma, VM_LOCKED_MASK);elsemm->locked_vm += (len >> PAGE_SHIFT);}if (file)uprobe_mmap(vma);/** New (or expanded) vma always get soft dirty status.* Otherwise user-space soft-dirty page tracker won't* be able to distinguish situation when vma area unmapped,* then new mapped in-place (which must be aimed as* a completely new data area).*/vm_flags_set(vma, VM_SOFTDIRTY);vma_set_page_prot(vma);validate_mm(mm);return addr;close_and_free_vma:if (file && vma->vm_ops && vma->vm_ops->close)vma->vm_ops->close(vma);if (file || vma->vm_file) {
unmap_and_free_vma:fput(vma->vm_file);vma->vm_file = NULL;vma_iter_set(&vmi, vma->vm_end);/* Undo any partial mapping done by a device driver. */unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start,vma->vm_end, vma->vm_end, true);}if (file && (vm_flags & VM_SHARED))mapping_unmap_writable(file->f_mapping);
free_vma:vm_area_free(vma);
unacct_error:if (charged)vm_unacct_memory(charged);validate_mm(mm);return error;
}

http://www.ppmy.cn/ops/166236.html

相关文章

MyBatis框架操作数据库一>xml和动态Sql

目录 配置连接字符串和MyBatis:数据库的连接配置&#xff1a;XML的配置&#xff1a; XML编写Sql:model层&#xff1a;mapper层&#xff1a; 动态Sql:if 标签和trim标签&#xff1a;where标签:Set标签:Foreach标签: Mybatis的开发有两种方式&#xff1a;&#xff1a; 注解和XML&…

FPGA前端设计适合哪些人学?该怎么学?

FPGA前端设计是一个具有挑战性且薪资待遇优渥的岗位&#xff0c;主要涉及FPGA芯片定义、逻辑结构设计。这个职位要求相关专业的本科及以上学历&#xff0c;并且需要掌握一定的专业技能。工作内容从IP级设计到全芯片&#xff08;SoC&#xff09;设计&#xff0c;涉及多个设计层级…

iOS应用程序开发(图片处理器)

续上篇 iOS 编程开发图片浏览器&#xff0c;继续实现一个图标生成功能。 功能。 操作系统平台&#xff1a;MacBook(macOS) IDE:xcode 编程语言&#xff1a;Objective-c 以下是小程序在 iPhone 模拟器中的运行视频。也可以在 iPad 模拟器中运行。 效果图如下所示&#xff1a…

Linux find 命令完全指南

find 是 Linux 系统最强大的文件搜索工具&#xff0c;支持 嵌套遍历、条件筛选、执行动作。以下通过场景分类解析核心用法&#xff0c;涵盖高效搜索、文件管理及高级技巧&#xff1a; 一、基础搜索模式 1. 按文件名搜索&#xff08;精确/模糊匹配&#xff09; <BASH> f…

洛谷 P1725 琪露诺 单调队列优化的线性dp

以上是题目 考虑到2e5的数据范围&#xff0c;暴力的先枚举i&#xff0c;在枚举走的步数区间j&#xff0c;是过不了的&#xff0c; 我们可以看出对于每一个i&#xff0c;只需要找出能走的i的区间的dp最大值即可&#xff0c;求区间最大值可以使用单调队列&#xff0c;时间复杂度…

江科大51单片机笔记【12】AT24C02(I2C总线)

写在前言 此为博主自学江科大51单片机&#xff08;B站&#xff09;的笔记&#xff0c;方便后续重温知识 在后面的章节中&#xff0c;为了防止篇幅过长和易于查找&#xff0c;我把一个小节分成两部分来发&#xff0c;上章节主要是关于本节课的硬件介绍、电路图、原理图等理论知识…

【Linux】UDP协议与TCP协议

目录 一、端口号 &#xff08;一&#xff09;端口号划分 &#xff08;二&#xff09;端口号相关概念 二、相关指令 &#xff08;一&#xff09;pidof &#xff08;二&#xff09;netstat 三、UDP协议 &#xff08;一&#xff09;UDP协议格式 &#xff08;二&#xff09…

vue 仿deepseek前端开发一个对话界面

后端&#xff1a;调用deepseek的api&#xff0c;所以返回数据格式和deepseek相同 {"model": "DeepSeek-R1-Distill-Qwen-1.5B", "choices": [{"index": 0, "delta": {"role": "assistant", "cont…